diff --git a/src/libsysprof/meson.build b/src/libsysprof/meson.build index 1d60a12f..09aed85d 100644 --- a/src/libsysprof/meson.build +++ b/src/libsysprof/meson.build @@ -53,6 +53,7 @@ libsysprof_private_sources = [ 'sysprof-kallsyms.c', 'sysprof-line-reader.c', 'sysprof-map-lookaside.c', + 'sysprof-symbol-map.c', ipc_service_src, stackstash_sources, helpers_sources, diff --git a/src/libsysprof/sysprof-symbol-map.c b/src/libsysprof/sysprof-symbol-map.c new file mode 100644 index 00000000..9d6b0505 --- /dev/null +++ b/src/libsysprof/sysprof-symbol-map.c @@ -0,0 +1,509 @@ +/* sysprof-symbol-map.c + * + * Copyright 2019 Christian Hergert + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#define G_LOG_DOMAIN "sysprof-symbol-map" + +#include "config.h" + +#include + +#include "sysprof-map-lookaside.h" +#include "sysprof-symbol-map.h" + +/* + * Because we can't rely on the address ranges of symbols from ELF files + * or elsewhere, we have to duplicate a lot of entries when building this + * so that we can resolve all of the corrent addresses. + */ + +SYSPROF_ALIGNED_BEGIN(1) +typedef struct +{ + SysprofCaptureAddress addr_begin; + SysprofCaptureAddress addr_end; + guint32 pid; + guint32 offset; + guint32 tag_offset; + guint32 padding; +} Decoded +SYSPROF_ALIGNED_END(1); + +struct _SysprofSymbolMap +{ + /* For creating maps */ + GStringChunk *chunk; + GHashTable *lookasides; + GPtrArray *resolvers; + GPtrArray *samples; + guint resolved : 1; + + /* For reading maps */ + GMappedFile *mapped; + const Decoded *symbols; + gsize n_symbols; + const gchar *beginptr; + const gchar *endptr; +}; + +typedef struct +{ + SysprofCaptureAddress addr; + const gchar *name; + GQuark tag; + guint32 pid; +} Element; + +static void +element_free (Element *ele) +{ + g_slice_free (Element, ele); +} + +static gint +element_compare (gconstpointer a, + gconstpointer b) +{ + const Element *aa = *(const Element **)a; + const Element *bb = *(const Element **)b; + + if (aa->pid < bb->pid) + return -1; + + if (aa->pid > bb->pid) + return 1; + + if (aa->addr < bb->addr) + return -1; + + if (aa->addr > bb->addr) + return 1; + + return 0; +} + +static guint +element_hash (gconstpointer data) +{ + const Element *ele = data; + struct { + guint32 a; + guint32 b; + } addr; + + memcpy (&addr, &ele->addr, sizeof addr); + return addr.a ^ addr.b ^ ele->pid; +} + +static gboolean +element_equal (gconstpointer a, + gconstpointer b) +{ + const Element *aa = a; + const Element *bb = b; + + return aa->pid == bb->pid && aa->addr == bb->addr; +} + +SysprofSymbolMap * +sysprof_symbol_map_new (void) +{ + SysprofSymbolMap *self; + + self = g_slice_new0 (SysprofSymbolMap); + self->samples = g_ptr_array_new_with_free_func ((GDestroyNotify) element_free); + self->chunk = g_string_chunk_new (4096*16); + self->resolvers = g_ptr_array_new_with_free_func (g_object_unref); + self->lookasides = g_hash_table_new_full (NULL, NULL, NULL, + (GDestroyNotify) sysprof_map_lookaside_free); + + return g_steal_pointer (&self); +} + +void +sysprof_symbol_map_free (SysprofSymbolMap *self) +{ + g_clear_pointer (&self->lookasides, g_hash_table_unref); + g_clear_pointer (&self->resolvers, g_ptr_array_unref); + g_clear_pointer (&self->chunk, g_string_chunk_free); + g_clear_pointer (&self->samples, g_ptr_array_unref); + g_clear_pointer (&self->mapped, g_mapped_file_unref); + g_slice_free (SysprofSymbolMap, self); +} + +static gint +search_for_symbol_cb (gconstpointer a, + gconstpointer b) +{ + const Decoded *key = a; + const Decoded *ele = b; + + if (key->pid < ele->pid) + return -1; + + if (key->pid > ele->pid) + return 1; + + g_assert (key->pid == ele->pid); + + if (key->addr_begin < ele->addr_begin) + return -1; + + if (key->addr_begin > ele->addr_end) + return 1; + + g_assert (key->addr_begin >= ele->addr_begin); + g_assert (key->addr_end <= ele->addr_end); + + return 0; +} + +const gchar * +sysprof_symbol_map_lookup (SysprofSymbolMap *self, + gint64 time, + gint32 pid, + SysprofCaptureAddress addr, + GQuark *tag) +{ + const Decoded *ret; + const Decoded key = { + .addr_begin = addr, + .addr_end = addr, + .pid = pid, + .offset = 0, + .tag_offset = 0, + }; + + g_assert (self != NULL); + + if (tag != NULL) + *tag = 0; + + ret = bsearch (&key, + self->symbols, + self->n_symbols, + sizeof *ret, + search_for_symbol_cb); + + if (ret == NULL) + return NULL; + + if (tag != NULL && ret->tag_offset < (self->endptr - self->beginptr)) + *tag = g_quark_from_string (&self->beginptr[ret->tag_offset]); + + if (ret->offset < (self->endptr - self->beginptr)) + return &self->beginptr[ret->offset]; + + return NULL; +} + +void +sysprof_symbol_map_add_resolver (SysprofSymbolMap *self, + SysprofSymbolResolver *resolver) +{ + g_assert (self != NULL); + g_assert (SYSPROF_IS_SYMBOL_RESOLVER (resolver)); + + g_ptr_array_add (self->resolvers, g_object_ref (resolver)); +} + +static void +sysprof_symbol_map_do_sample (SysprofSymbolMap *self, + SysprofCaptureReader *reader, + GHashTable *seen) +{ + SysprofAddressContext last_context = SYSPROF_ADDRESS_CONTEXT_NONE; + const SysprofCaptureSample *sample; + + g_assert (self != NULL); + g_assert (reader != NULL); + g_assert (seen != NULL); + + if (!(sample = sysprof_capture_reader_read_sample (reader))) + return; + + for (guint i = 0; i < sample->n_addrs; i++) + { + SysprofCaptureAddress addr = sample->addrs[i]; + SysprofAddressContext context; + + if (sysprof_address_is_context_switch (addr, &context)) + { + last_context = context; + continue; + } + + for (guint j = 0; j < self->resolvers->len; j++) + { + SysprofSymbolResolver *resolver = g_ptr_array_index (self->resolvers, j); + g_autofree gchar *name = NULL; + const gchar *cname; + Element ele; + GQuark tag = 0; + + name = sysprof_symbol_resolver_resolve_with_context (resolver, + sample->frame.time, + sample->frame.pid, + last_context, + addr, + &tag); + + if (name == NULL) + continue; + + cname = g_string_chunk_insert_const (self->chunk, name); + + ele.addr = addr; + ele.pid = sample->frame.pid; + ele.name = cname; + ele.tag = tag; + + if (!g_hash_table_contains (seen, &ele)) + { + Element *cpy = g_slice_dup (Element, &ele); + g_hash_table_add (seen, cpy); + g_ptr_array_add (self->samples, cpy); + } + } + } +} + +void +sysprof_symbol_map_resolve (SysprofSymbolMap *self, + SysprofCaptureReader *reader) +{ + g_autoptr(GHashTable) seen = NULL; + SysprofCaptureFrameType type; + + g_return_if_fail (self != NULL); + g_return_if_fail (self->resolved == FALSE); + g_return_if_fail (reader != NULL); + + self->resolved = TRUE; + + seen = g_hash_table_new (element_hash, element_equal); + + sysprof_capture_reader_reset (reader); + + for (guint i = 0; i < self->resolvers->len; i++) + { + sysprof_symbol_resolver_load (g_ptr_array_index (self->resolvers, i), reader); + sysprof_capture_reader_reset (reader); + } + + while (sysprof_capture_reader_peek_type (reader, &type)) + { + if (type == SYSPROF_CAPTURE_FRAME_SAMPLE) + { + sysprof_symbol_map_do_sample (self, reader, seen); + continue; + } + + if (!sysprof_capture_reader_skip (reader)) + break; + } + + g_ptr_array_sort (self->samples, element_compare); +} + +void +sysprof_symbol_map_printf (SysprofSymbolMap *self) +{ + g_return_if_fail (self != NULL); + g_return_if_fail (self->samples != NULL); + + for (guint i = 0; i < self->samples->len; i++) + { + Element *ele = g_ptr_array_index (self->samples, i); + + if (ele->tag) + g_print ("%-5d: %p: %s [%s]\n", ele->pid, (gpointer)ele->addr, ele->name, g_quark_to_string (ele->tag)); + else + g_print ("%-5d: %p: %s\n", ele->pid, (gpointer)ele->addr, ele->name); + } +} + +static guint +get_string_offset (GByteArray *ar, + GHashTable *seen, + const gchar *str) +{ + gpointer ret; + + if G_UNLIKELY (!g_hash_table_lookup_extended (seen, str, NULL, &ret)) + { + ret = GUINT_TO_POINTER (ar->len); + g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1); + g_hash_table_insert (seen, (gpointer)str, ret); + } + + return GPOINTER_TO_UINT (ret); +} + +gboolean +sysprof_symbol_map_serialize (SysprofSymbolMap *self, + gint fd) +{ + static const Decoded empty = {0}; + SysprofCaptureAddress begin = 0; + g_autoptr(GByteArray) ar = NULL; + g_autoptr(GHashTable) seen = NULL; + g_autoptr(GArray) decoded = NULL; + gsize offset; + + g_assert (self != NULL); + g_assert (fd != -1); + + ar = g_byte_array_new (); + seen = g_hash_table_new (NULL, NULL); + decoded = g_array_new (FALSE, FALSE, sizeof (Decoded)); + + /* Add some empty space to both give us non-zero offsets and also ensure + * empty space between data. + */ + g_byte_array_append (ar, (guint8 *)&empty, sizeof empty); + + for (guint i = 0; i < self->samples->len; i++) + { + Element *ele = g_ptr_array_index (self->samples, i); + + if (!g_hash_table_contains (seen, ele->name)) + { + const gchar *str = ele->name; + gpointer ptr = GUINT_TO_POINTER (ar->len); + g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1); + g_hash_table_insert (seen, (gpointer)str, ptr); + } + } + + for (guint i = 0; i < self->samples->len; i++) + { + Element *ele = g_ptr_array_index (self->samples, i); + Decoded dec; + + if (begin == 0) + begin = ele->addr; + + if ((i + 1) < self->samples->len) + { + Element *next = g_ptr_array_index (self->samples, i + 1); + + if (ele->pid == next->pid && ele->name == next->name) + continue; + } + + dec.padding = 0; + dec.addr_begin = begin; + dec.addr_end = ele->addr; + dec.pid = ele->pid; + dec.offset = get_string_offset (ar, seen, ele->name); + + if (ele->tag) + dec.tag_offset = get_string_offset (ar, seen, g_quark_to_string (ele->tag)); + else + dec.tag_offset = 0; + + g_array_append_val (decoded, dec); + + begin = 0; + } + + offset = sizeof empty * decoded->len; + + for (guint i = 0; i < decoded->len; i++) + { + Decoded *dec = &g_array_index (decoded, Decoded, i); + + if (dec->offset) + dec->offset += offset; + + if (dec->tag_offset) + dec->tag_offset += offset; + } + + if (write (fd, decoded->data, offset) != offset) + return FALSE; + + if (write (fd, ar->data, ar->len) != ar->len) + return FALSE; + + /* Aggressively release state now that we're finished */ + if (self->samples->len) + g_ptr_array_remove_range (self->samples, 0, self->samples->len); + if (self->resolvers != NULL) + g_ptr_array_remove_range (self->resolvers, 0, self->resolvers->len); + g_string_chunk_clear (self->chunk); + g_hash_table_remove_all (self->lookasides); + + return TRUE; +} + +gboolean +sysprof_symbol_map_deserialize (SysprofSymbolMap *self, + gint byte_order, + gint fd) +{ + gboolean needs_swap = byte_order != G_BYTE_ORDER; + gchar *beginptr; + gchar *endptr; + + g_return_val_if_fail (self != NULL, FALSE); + g_return_val_if_fail (self->mapped == NULL, FALSE); + + if (!(self->mapped = g_mapped_file_new_from_fd (fd, TRUE, NULL))) + return FALSE; + + beginptr = g_mapped_file_get_contents (self->mapped); + endptr = beginptr + g_mapped_file_get_length (self->mapped); + + for (gchar *ptr = beginptr; + ptr < endptr && (ptr + sizeof (Decoded)) < endptr; + ptr += sizeof (Decoded)) + { + Decoded *sym = (Decoded *)ptr; + + if (sym->addr_begin == 0 && + sym->addr_end == 0 && + sym->pid == 0 && + sym->offset == 0) + { + self->symbols = (const Decoded *)beginptr; + self->n_symbols = sym - self->symbols; + break; + } + else if (needs_swap) + { + sym->addr_begin = GUINT64_SWAP_LE_BE (sym->addr_begin); + sym->addr_end = GUINT64_SWAP_LE_BE (sym->addr_end); + sym->pid = GUINT32_SWAP_LE_BE (sym->pid); + sym->offset = GUINT32_SWAP_LE_BE (sym->offset); + sym->tag_offset = GUINT32_SWAP_LE_BE (sym->tag_offset); + } + +#if 0 + g_print ("Added pid=%d begin=%p end=%p\n", + sym->pid, (gpointer)sym->begin, (gpointer)sym->end); +#endif + } + + self->beginptr = beginptr; + self->endptr = endptr; + + return TRUE; +} diff --git a/src/libsysprof/sysprof-symbol-map.h b/src/libsysprof/sysprof-symbol-map.h new file mode 100644 index 00000000..2e482c2c --- /dev/null +++ b/src/libsysprof/sysprof-symbol-map.h @@ -0,0 +1,49 @@ +/* sysprof-symbol-map.h + * + * Copyright 2019 Christian Hergert + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include + +#include "sysprof-symbol-resolver.h" + +G_BEGIN_DECLS + +typedef struct _SysprofSymbolMap SysprofSymbolMap; + +SysprofSymbolMap *sysprof_symbol_map_new (void); +void sysprof_symbol_map_add_resolver (SysprofSymbolMap *self, + SysprofSymbolResolver *resolver); +void sysprof_symbol_map_resolve (SysprofSymbolMap *self, + SysprofCaptureReader *reader); +const gchar *sysprof_symbol_map_lookup (SysprofSymbolMap *self, + gint64 time, + gint32 pid, + SysprofCaptureAddress addr, + GQuark *tag); +void sysprof_symbol_map_printf (SysprofSymbolMap *self); +gboolean sysprof_symbol_map_serialize (SysprofSymbolMap *self, + gint fd); +gboolean sysprof_symbol_map_deserialize (SysprofSymbolMap *self, + gint byte_order, + gint fd); +void sysprof_symbol_map_free (SysprofSymbolMap *self); + +G_END_DECLS diff --git a/src/tests/test-addr-map.c b/src/tests/test-addr-map.c new file mode 100644 index 00000000..c1867e0f --- /dev/null +++ b/src/tests/test-addr-map.c @@ -0,0 +1,105 @@ +#include +#include + +#include "sysprof-platform.h" +#include "sysprof-symbol-map.h" + +static GMainLoop *main_loop; + +static void * +resolve_in_thread (gpointer data) +{ + SysprofCaptureReader *reader = data; + g_autoptr(SysprofSymbolResolver) kernel = NULL; + g_autoptr(SysprofSymbolResolver) elf = NULL; + SysprofCaptureFrameType type; + SysprofSymbolMap *map; + gboolean r; + int fd; + + g_assert (reader != NULL); + + map = sysprof_symbol_map_new (); + kernel = sysprof_kernel_symbol_resolver_new (); + elf = sysprof_elf_symbol_resolver_new (); + + sysprof_symbol_map_add_resolver (map, kernel); + sysprof_symbol_map_add_resolver (map, elf); + + sysprof_symbol_map_resolve (map, reader); + + fd = sysprof_memfd_create ("decode-test"); + g_assert_cmpint (fd, !=, -1); + + r = sysprof_symbol_map_serialize (map, fd); + g_assert_true (r); + sysprof_symbol_map_free (map); + + /* Reset some state */ + sysprof_capture_reader_reset (reader); + lseek (fd, SEEK_SET, 0); + + /* Now desrialize it */ + map = sysprof_symbol_map_new (); + sysprof_symbol_map_deserialize (map, G_BYTE_ORDER, fd); + + /* Now try to print some stack traces */ + while (sysprof_capture_reader_peek_type (reader, &type)) + { + if (type == SYSPROF_CAPTURE_FRAME_SAMPLE) + { + const SysprofCaptureSample *sample = NULL; + + if (!(sample = sysprof_capture_reader_read_sample (reader))) + break; + + for (guint j = 0; j < sample->n_addrs; j++) + { + const gchar *name; + GQuark tag; + + if (!(name = sysprof_symbol_map_lookup (map, sample->frame.time, sample->frame.pid, sample->addrs[j], &tag))) + name = "Unknown symbol"; + + g_print ("%u: %s\n", j, name); + } + + g_print ("======\n"); + } + else if (!sysprof_capture_reader_skip (reader)) + break; + } + + + sysprof_symbol_map_free (map); + + close (fd); + g_main_loop_quit (main_loop); + return NULL; +} + +gint +main (gint argc, + gchar *argv[]) +{ + g_autoptr(SysprofCaptureReader) reader = NULL; + g_autoptr(GError) error = NULL; + + if (argc != 2) + { + g_printerr ("usage: %s CAPTURE_FILE\n", argv[0]); + return 1; + } + + if (!(reader = sysprof_capture_reader_new (argv[1], &error))) + { + g_printerr ("%s\n", error->message); + return 1; + } + + main_loop = g_main_loop_new (NULL, FALSE); + g_thread_new ("reader-thread", resolve_in_thread, reader); + g_main_loop_run (main_loop); + + return 0; +}