From 5f352abc86998ec3d8499a5db506462552ae1064 Mon Sep 17 00:00:00 2001 From: Christian Hergert Date: Wed, 15 Sep 2021 17:52:58 -0700 Subject: [PATCH] do delayed path resolving of files containing symbols we still need to teach this to locate debug dirs relative to the process paths. --- src/libsysprof/sysprof-elf-symbol-resolver.c | 232 ++++++++++++++---- .../sysprof-symbol-resolver-private.h | 31 +++ src/libsysprof/sysprof-symbol-resolver.c | 53 +++- 3 files changed, 265 insertions(+), 51 deletions(-) create mode 100644 src/libsysprof/sysprof-symbol-resolver-private.h diff --git a/src/libsysprof/sysprof-elf-symbol-resolver.c b/src/libsysprof/sysprof-elf-symbol-resolver.c index 27a3b5fb..c555e056 100644 --- a/src/libsysprof/sysprof-elf-symbol-resolver.c +++ b/src/libsysprof/sysprof-elf-symbol-resolver.c @@ -20,6 +20,7 @@ #include "config.h" +#include #include #include "binfile.h" @@ -27,14 +28,34 @@ #include "sysprof-elf-symbol-resolver.h" #include "sysprof-flatpak.h" #include "sysprof-map-lookaside.h" +#include "sysprof-path-resolver.h" #include "sysprof-podman.h" +#include "sysprof-symbol-resolver-private.h" + +typedef struct +{ + char *on_host; + char *in_process; + int layer; +} ProcessOverlay; + +typedef struct +{ + SysprofMapLookaside *lookaside; + SysprofPathResolver *resolver; + GByteArray *mountinfo_data; + GArray *overlays; + int pid; +} ProcessInfo; struct _SysprofElfSymbolResolver { - GObject parent_instance; + GObject parent_instance; + + GHashTable *processes; + GStringChunk *chunks; GArray *debug_dirs; - GHashTable *lookasides; GHashTable *bin_files; GHashTable *tag_cache; }; @@ -48,6 +69,20 @@ G_DEFINE_TYPE_EXTENDED (SysprofElfSymbolResolver, G_IMPLEMENT_INTERFACE (SYSPROF_TYPE_SYMBOL_RESOLVER, symbol_resolver_iface_init)) +static void +process_info_free (gpointer data) +{ + ProcessInfo *pi = data; + + if (pi != NULL) + { + g_clear_pointer (&pi->mountinfo_data, g_byte_array_unref); + g_clear_pointer (&pi->resolver, _sysprof_path_resolver_free); + g_clear_pointer (&pi->lookaside, sysprof_map_lookaside_free); + g_slice_free (ProcessInfo, pi); + } +} + static gboolean is_flatpak (void) { @@ -69,9 +104,9 @@ sysprof_elf_symbol_resolver_finalize (GObject *object) SysprofElfSymbolResolver *self = (SysprofElfSymbolResolver *)object; g_clear_pointer (&self->bin_files, g_hash_table_unref); - g_clear_pointer (&self->lookasides, g_hash_table_unref); g_clear_pointer (&self->tag_cache, g_hash_table_unref); g_clear_pointer (&self->debug_dirs, g_array_unref); + g_clear_pointer (&self->processes, g_hash_table_unref); G_OBJECT_CLASS (sysprof_elf_symbol_resolver_parent_class)->finalize (object); } @@ -95,6 +130,8 @@ sysprof_elf_symbol_resolver_init (SysprofElfSymbolResolver *self) { g_auto(GStrv) podman_dirs = NULL; + self->processes = g_hash_table_new_full (NULL, NULL, NULL, process_info_free); + self->debug_dirs = g_array_new (TRUE, FALSE, sizeof (gchar *)); g_array_set_clear_func (self->debug_dirs, free_element_string); @@ -115,11 +152,6 @@ sysprof_elf_symbol_resolver_init (SysprofElfSymbolResolver *self) sysprof_elf_symbol_resolver_add_debug_dir (self, debug_dirs[i]); } - self->lookasides = g_hash_table_new_full (NULL, - NULL, - NULL, - (GDestroyNotify)sysprof_map_lookaside_free); - self->bin_files = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, @@ -128,63 +160,179 @@ sysprof_elf_symbol_resolver_init (SysprofElfSymbolResolver *self) self->tag_cache = g_hash_table_new_full (g_str_hash, g_str_equal, g_free, NULL); } +static ProcessInfo * +sysprof_elf_symbol_resolver_get_process (SysprofElfSymbolResolver *self, + int pid) +{ + ProcessInfo *pi; + + g_assert (SYSPROF_IS_ELF_SYMBOL_RESOLVER (self)); + + if (!(pi = g_hash_table_lookup (self->processes, GINT_TO_POINTER (pid)))) + { + pi = g_slice_new0 (ProcessInfo); + pi->pid = pid; + g_hash_table_insert (self->processes, GINT_TO_POINTER (pid), pi); + } + + return pi; +} + static void sysprof_elf_symbol_resolver_load (SysprofSymbolResolver *resolver, SysprofCaptureReader *reader) { SysprofElfSymbolResolver *self = (SysprofElfSymbolResolver *)resolver; + static const guint8 zero[1] = {0}; SysprofCaptureFrameType type; + g_autoptr(GByteArray) mounts = NULL; + g_autofree char *mounts_data = NULL; + GHashTableIter iter; + gpointer k, v; - g_assert (SYSPROF_IS_SYMBOL_RESOLVER (resolver)); + g_assert (SYSPROF_IS_ELF_SYMBOL_RESOLVER (self)); g_assert (reader != NULL); + g_hash_table_remove_all (self->processes); + + /* First we need to load all the /proc/{pid}/mountinfo files so that + * we can discover what files within the processes filesystem namespace + * were mapped and where. We can use that information later to build + * path resolvers that let us locate the files from the host. + */ sysprof_capture_reader_reset (reader); + while (sysprof_capture_reader_peek_type (reader, &type)) + { + if (type == SYSPROF_CAPTURE_FRAME_FILE_CHUNK) + { + const SysprofCaptureFileChunk *ev; + ProcessInfo *pi; + int pid; - /* Start by finding mount/mountinfo for processes */ + if (!(ev = sysprof_capture_reader_read_file (reader))) + break; + if (g_str_has_prefix (ev->path, "/proc/") && + g_str_has_suffix (ev->path, "/mountinfo") && + sscanf (ev->path, "/proc/%u/mountinfo", &pid) == 1) + { + pi = sysprof_elf_symbol_resolver_get_process (self, pid); + if (pi->mountinfo_data == NULL) + pi->mountinfo_data = g_byte_array_new (); + if (ev->len) + g_byte_array_append (pi->mountinfo_data, ev->data, ev->len); + } + else if (g_str_equal (ev->path, "/proc/mounts")) + { + if (mounts == NULL) + mounts = g_byte_array_new (); + if (ev->len) + g_byte_array_append (mounts, ev->data, ev->len); + } + } + else if (type == SYSPROF_CAPTURE_FRAME_OVERLAY) + { + const SysprofCaptureOverlay *ev; + ProcessOverlay ov; + ProcessInfo *pi; + + if (!(ev = sysprof_capture_reader_read_overlay (reader))) + break; + + ov.on_host = g_string_chunk_insert_const (self->chunks, ev->data); + ov.in_process = g_string_chunk_insert_const (self->chunks, &ev->data[ev->src_len+1]); + ov.layer = ev->layer; + + pi = sysprof_elf_symbol_resolver_get_process (self, ev->frame.pid); + if (pi->overlays == NULL) + pi->overlays = g_array_new (FALSE, FALSE, sizeof (ProcessOverlay)); + g_array_append_val (pi->overlays, ov); + } + else + { + if (!sysprof_capture_reader_skip (reader)) + break; + } + } + + /* Now make sure we have access to /proc/mounts data. If we do not find it + * within the capture, assume we're running on the same host. + */ + if (mounts != NULL) + { + g_byte_array_append (mounts, zero, 1); + mounts_data = (char *)g_byte_array_free (g_steal_pointer (&mounts), FALSE); + } + + if (mounts_data == NULL) + g_file_get_contents ("/proc/mounts", &mounts_data, NULL, NULL); + + /* Now that we loaded all the mountinfo data, we can create path resolvers + * for each of the processes. Once we have that data we can walk the file + * again to load the map events. + */ + g_hash_table_iter_init (&iter, self->processes); + while (g_hash_table_iter_next (&iter, &k, &v)) + { + ProcessInfo *pi = v; + + if (pi->mountinfo_data == NULL) + continue; + + g_byte_array_append (pi->mountinfo_data, zero, 1); + + pi->resolver = _sysprof_path_resolver_new (mounts_data, + (const char *)pi->mountinfo_data->data); + + if (pi->overlays != NULL) + { + for (guint i = 0; i < pi->overlays->len; i++) + { + const ProcessOverlay *ov = &g_array_index (pi->overlays, ProcessOverlay, i); + _sysprof_path_resolver_add_overlay (pi->resolver, ov->in_process, ov->on_host, ov->layer); + } + } + } + + /* Walk through the file again and extract maps so long as + * we have a resolver for them already. + */ + sysprof_capture_reader_reset (reader); while (sysprof_capture_reader_peek_type (reader, &type)) { if (type == SYSPROF_CAPTURE_FRAME_MAP) { const SysprofCaptureMap *ev = sysprof_capture_reader_read_map (reader); - SysprofMapLookaside *lookaside = g_hash_table_lookup (self->lookasides, GINT_TO_POINTER (ev->frame.pid)); const char *filename = ev->filename; + g_autofree char *resolved = NULL; + ProcessInfo *pi; SysprofMap map; + pi = sysprof_elf_symbol_resolver_get_process (self, ev->frame.pid); + + if (pi->resolver != NULL) + { + resolved = _sysprof_path_resolver_resolve (pi->resolver, filename); + + if (resolved) + filename = resolved; + } + map.start = ev->start; map.end = ev->end; map.offset = ev->offset; map.inode = ev->inode; map.filename = filename; - if (lookaside == NULL) - { - lookaside = sysprof_map_lookaside_new (); - g_hash_table_insert (self->lookasides, GINT_TO_POINTER (ev->frame.pid), lookaside); - } + if (pi->lookaside == NULL) + pi->lookaside = sysprof_map_lookaside_new (); - sysprof_map_lookaside_insert (lookaside, &map); - } - else if (type == SYSPROF_CAPTURE_FRAME_OVERLAY) - { - const SysprofCaptureOverlay *ev = sysprof_capture_reader_read_overlay (reader); - SysprofMapLookaside *lookaside = g_hash_table_lookup (self->lookasides, GINT_TO_POINTER (ev->frame.pid)); - const char *src = ev->data; - const char *dst = &ev->data[ev->src_len+1]; - - if (lookaside == NULL) - { - lookaside = sysprof_map_lookaside_new (); - g_hash_table_insert (self->lookasides, GINT_TO_POINTER (ev->frame.pid), lookaside); - } - - sysprof_map_lookaside_overlay (lookaside, src, dst); + sysprof_map_lookaside_insert (pi->lookaside, &map); } else { if (!sysprof_capture_reader_skip (reader)) return; - continue; } } } @@ -351,15 +499,13 @@ sysprof_elf_symbol_resolver_resolve_full (SysprofElfSymbolResolver *self, gchar **name, GQuark *tag) { - SysprofMapLookaside *lookaside; - const SysprofMapOverlay *overlays = NULL; const bin_symbol_t *bin_sym; const gchar *bin_sym_name; const SysprofMap *map; + ProcessInfo *pi; bin_file_t *bin_file; gulong ubegin; gulong uend; - guint n_overlays = 0; g_assert (SYSPROF_IS_ELF_SYMBOL_RESOLVER (self)); g_assert (name != NULL); @@ -371,24 +517,18 @@ sysprof_elf_symbol_resolver_resolve_full (SysprofElfSymbolResolver *self, if (context != SYSPROF_ADDRESS_CONTEXT_USER) return FALSE; - lookaside = g_hash_table_lookup (self->lookasides, GINT_TO_POINTER (pid)); - if G_UNLIKELY (lookaside == NULL) + if (!(pi = g_hash_table_lookup (self->processes, GINT_TO_POINTER (pid)))) return FALSE; - map = sysprof_map_lookaside_lookup (lookaside, address); + map = sysprof_map_lookaside_lookup (pi->lookaside, address); if G_UNLIKELY (map == NULL) return FALSE; address -= map->start; address += map->offset; - if (lookaside->overlays) - { - overlays = &g_array_index (lookaside->overlays, SysprofMapOverlay, 0); - n_overlays = lookaside->overlays->len; - } - - bin_file = sysprof_elf_symbol_resolver_get_bin_file (self, overlays, n_overlays, map->filename); + /* TODO: Get debugdirs for process */ + bin_file = sysprof_elf_symbol_resolver_get_bin_file (self, NULL, 0, map->filename); g_assert (bin_file != NULL); diff --git a/src/libsysprof/sysprof-symbol-resolver-private.h b/src/libsysprof/sysprof-symbol-resolver-private.h new file mode 100644 index 00000000..76cbe45e --- /dev/null +++ b/src/libsysprof/sysprof-symbol-resolver-private.h @@ -0,0 +1,31 @@ +/* sysprof-symbol-resolver-private.h + * + * Copyright 2021 Christian Hergert + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include +#include + +G_BEGIN_DECLS + +char *_sysprof_symbol_resolver_load_file (SysprofCaptureReader *reader, + const char *path); + +G_END_DECLS diff --git a/src/libsysprof/sysprof-symbol-resolver.c b/src/libsysprof/sysprof-symbol-resolver.c index 83da30a0..19e4140d 100644 --- a/src/libsysprof/sysprof-symbol-resolver.c +++ b/src/libsysprof/sysprof-symbol-resolver.c @@ -20,6 +20,7 @@ #include "config.h" +#include "sysprof-platform.h" #include "sysprof-symbol-resolver.h" G_DEFINE_INTERFACE (SysprofSymbolResolver, sysprof_symbol_resolver, G_TYPE_OBJECT) @@ -125,11 +126,11 @@ sysprof_symbol_resolver_resolve (SysprofSymbolResolver *self, */ gchar * sysprof_symbol_resolver_resolve_with_context (SysprofSymbolResolver *self, - guint64 time, - GPid pid, - SysprofAddressContext context, - SysprofCaptureAddress address, - GQuark *tag) + guint64 time, + GPid pid, + SysprofAddressContext context, + SysprofCaptureAddress address, + GQuark *tag) { GQuark dummy; @@ -142,3 +143,45 @@ sysprof_symbol_resolver_resolve_with_context (SysprofSymbolResolver *self, return SYSPROF_SYMBOL_RESOLVER_GET_IFACE (self)->resolve_with_context (self, time, pid, context, address, tag); } + +char * +_sysprof_symbol_resolver_load_file (SysprofCaptureReader *reader, + const char *path) +{ + g_autofree char *data = NULL; + goffset len; + goffset pos = 0; + int fd; + + g_assert (reader != NULL); + g_assert (path != NULL); + + sysprof_capture_reader_reset (reader); + + if (-1 == (fd = sysprof_memfd_create ("")) || + !sysprof_capture_reader_read_file_fd (reader, path, fd)) + { + if (fd != -1) + close (fd); + return NULL; + } + + len = lseek (fd, 0L, SEEK_CUR); + data = g_malloc (len + 1); + lseek (fd, 0L, SEEK_SET); + + while (pos < len) + { + gssize n_read = read (fd, data + pos, len - pos); + + if (n_read < 0) + return NULL; + + pos += n_read; + } + + data[len] = 0; + close (fd); + + return g_steal_pointer (&data); +}