From b625fec4545fc2d7eb9682b06733141b4171ee77 Mon Sep 17 00:00:00 2001 From: Christian Hergert Date: Tue, 9 May 2023 13:29:52 -0700 Subject: [PATCH] libsysprof-analyze: add basic symbolize API This isn't what it will look like in final form, just get the minimum there for us to use it with the bundled decoder (copied from libsysprof with adaptations). --- .../sysprof-bundled-symbolizer.c | 138 +++++++++++++++++- .../sysprof-document-symbols.c | 12 ++ .../sysprof-symbolizer-private.h | 41 ++++-- src/libsysprof-analyze/sysprof-symbolizer.c | 9 ++ 4 files changed, 183 insertions(+), 17 deletions(-) diff --git a/src/libsysprof-analyze/sysprof-bundled-symbolizer.c b/src/libsysprof-analyze/sysprof-bundled-symbolizer.c index cbe8e239..997c8b49 100644 --- a/src/libsysprof-analyze/sysprof-bundled-symbolizer.c +++ b/src/libsysprof-analyze/sysprof-bundled-symbolizer.c @@ -23,10 +23,30 @@ #include "sysprof-bundled-symbolizer.h" #include "sysprof-document-private.h" #include "sysprof-symbolizer-private.h" +#include "sysprof-symbol-private.h" + +SYSPROF_ALIGNED_BEGIN(1) +typedef struct +{ + SysprofCaptureAddress addr_begin; + SysprofCaptureAddress addr_end; + guint32 pid; + guint32 offset; + guint32 tag_offset; + guint32 padding; +} Decoded +SYSPROF_ALIGNED_END(1); struct _SysprofBundledSymbolizer { - SysprofSymbolizer parent_instance; + SysprofSymbolizer parent_instance; + + const Decoded *symbols; + guint n_symbols; + + GBytes *bytes; + const gchar *beginptr; + const gchar *endptr; }; struct _SysprofBundledSymbolizerClass @@ -41,9 +61,45 @@ sysprof_bundled_symbolizer_decode (SysprofBundledSymbolizer *self, GBytes *bytes, gboolean is_native) { + char *beginptr; + char *endptr; + g_assert (SYSPROF_IS_BUNDLED_SYMBOLIZER (self)); g_assert (bytes != NULL); + /* Our GBytes always contain a trialing \0 after what we think + * is the end of the buffer. + */ + beginptr = (char *)g_bytes_get_data (bytes, NULL); + endptr = beginptr + g_bytes_get_size (bytes); + + for (gchar *ptr = beginptr; + ptr < endptr && (ptr + sizeof (Decoded)) < endptr; + ptr += sizeof (Decoded)) + { + Decoded *sym = (Decoded *)ptr; + + if (sym->addr_begin == 0 && + sym->addr_end == 0 && + sym->pid == 0 && + sym->offset == 0) + { + self->symbols = (const Decoded *)beginptr; + self->n_symbols = sym - self->symbols; + break; + } + else if (!is_native) + { + sym->addr_begin = GUINT64_SWAP_LE_BE (sym->addr_begin); + sym->addr_end = GUINT64_SWAP_LE_BE (sym->addr_end); + sym->pid = GUINT32_SWAP_LE_BE (sym->pid); + sym->offset = GUINT32_SWAP_LE_BE (sym->offset); + sym->tag_offset = GUINT32_SWAP_LE_BE (sym->tag_offset); + } + } + + self->beginptr = beginptr; + self->endptr = endptr; } static void @@ -84,9 +140,88 @@ sysprof_bundled_symbolizer_prepare_finish (SysprofSymbolizer *symbolizer, return g_task_propagate_boolean (G_TASK (result), error); } +static gint +search_for_symbol_cb (gconstpointer a, + gconstpointer b) +{ + const Decoded *key = a; + const Decoded *ele = b; + + if (key->pid < ele->pid) + return -1; + + if (key->pid > ele->pid) + return 1; + + g_assert (key->pid == ele->pid); + + if (key->addr_begin < ele->addr_begin) + return -1; + + if (key->addr_begin > ele->addr_end) + return 1; + + g_assert (key->addr_begin >= ele->addr_begin); + g_assert (key->addr_end <= ele->addr_end); + + return 0; +} + +static SysprofSymbol * +sysprof_bundled_symbolizer_symbolize (SysprofSymbolizer *symbolizer, + gint64 time, + int pid, + SysprofAddress address) +{ + SysprofBundledSymbolizer *self = SYSPROF_BUNDLED_SYMBOLIZER (symbolizer); + g_autoptr(GRefString) tag = NULL; + const Decoded *ret; + const Decoded key = { + .addr_begin = address, + .addr_end = address, + .pid = pid, + .offset = 0, + .tag_offset = 0, + }; + + if (self->n_symbols == 0) + return NULL; + + ret = bsearch (&key, + self->symbols, + self->n_symbols, + sizeof *ret, + search_for_symbol_cb); + + if (ret == NULL || ret->offset == 0) + return NULL; + + if (ret->tag_offset > 0) + { + if (ret->tag_offset < (self->endptr - self->beginptr)) + tag = g_ref_string_new (&self->beginptr[ret->tag_offset]); + } + + if (ret->offset < (self->endptr - self->beginptr)) + return _sysprof_symbol_new (g_ref_string_new (&self->beginptr[ret->offset]), + g_steal_pointer (&tag), + NULL); + + return NULL; +} + static void sysprof_bundled_symbolizer_finalize (GObject *object) { + SysprofBundledSymbolizer *self = (SysprofBundledSymbolizer *)object; + + self->symbols = NULL; + self->n_symbols = 0; + self->beginptr = NULL; + self->endptr = NULL; + + g_clear_pointer (&self->bytes, g_bytes_unref); + G_OBJECT_CLASS (sysprof_bundled_symbolizer_parent_class)->finalize (object); } @@ -100,6 +235,7 @@ sysprof_bundled_symbolizer_class_init (SysprofBundledSymbolizerClass *klass) symbolizer_class->prepare_async = sysprof_bundled_symbolizer_prepare_async; symbolizer_class->prepare_finish = sysprof_bundled_symbolizer_prepare_finish; + symbolizer_class->symbolize = sysprof_bundled_symbolizer_symbolize; } static void diff --git a/src/libsysprof-analyze/sysprof-document-symbols.c b/src/libsysprof-analyze/sysprof-document-symbols.c index 16890562..a275d2ac 100644 --- a/src/libsysprof-analyze/sysprof-document-symbols.c +++ b/src/libsysprof-analyze/sysprof-document-symbols.c @@ -83,11 +83,16 @@ sysprof_document_symbols_add_traceable (SysprofDocumentSymbols *self, SysprofAddressContext last_context; guint64 *addresses; guint n_addresses; + gint64 time; + int pid; g_assert (SYSPROF_IS_DOCUMENT_SYMBOLS (self)); g_assert (SYSPROF_IS_DOCUMENT_TRACEABLE (traceable)); g_assert (SYSPROF_IS_SYMBOLIZER (symbolizer)); + time = sysprof_document_frame_get_time (SYSPROF_DOCUMENT_FRAME (traceable)); + pid = sysprof_document_frame_get_pid (SYSPROF_DOCUMENT_FRAME (traceable)); + /* TODO: We need to get the SysprofMountNamespace for the PID which must have * already been compiled. We also need the list SysprofDocumentMmap so that we * can get the build-id or inode to do various validation checks. @@ -119,6 +124,13 @@ sysprof_document_symbols_add_traceable (SysprofDocumentSymbols *self, } else { + g_autoptr(SysprofSymbol) symbol = _sysprof_symbolizer_symbolize (symbolizer, time, pid, address); + + /* TODO: This isn't the API we'll use for symbolizing, it just gets + * some plumbing in place. Additionally, we'll probably cache all these + * values here so that we can skip calling the symbolizer at all for + * subsequent symbols within a given range. + */ } } } diff --git a/src/libsysprof-analyze/sysprof-symbolizer-private.h b/src/libsysprof-analyze/sysprof-symbolizer-private.h index c70a110c..0416fb29 100644 --- a/src/libsysprof-analyze/sysprof-symbolizer-private.h +++ b/src/libsysprof-analyze/sysprof-symbolizer-private.h @@ -21,6 +21,7 @@ #pragma once #include "sysprof-document.h" +#include "sysprof-symbol.h" #include "sysprof-symbolizer.h" G_BEGIN_DECLS @@ -36,24 +37,32 @@ struct _SysprofSymbolizerClass { GObjectClass parent_class; - void (*prepare_async) (SysprofSymbolizer *self, - SysprofDocument *document, - GCancellable *cancellable, - GAsyncReadyCallback callback, - gpointer user_data); - gboolean (*prepare_finish) (SysprofSymbolizer *self, - GAsyncResult *result, - GError **error); + void (*prepare_async) (SysprofSymbolizer *self, + SysprofDocument *document, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data); + gboolean (*prepare_finish) (SysprofSymbolizer *self, + GAsyncResult *result, + GError **error); + SysprofSymbol *(*symbolize) (SysprofSymbolizer *self, + gint64 time, + int pid, + SysprofAddress address); }; -void _sysprof_symbolizer_prepare_async (SysprofSymbolizer *self, - SysprofDocument *document, - GCancellable *cancellable, - GAsyncReadyCallback callback, - gpointer user_data); -gboolean _sysprof_symbolizer_prepare_finish (SysprofSymbolizer *self, - GAsyncResult *result, - GError **error); +void _sysprof_symbolizer_prepare_async (SysprofSymbolizer *self, + SysprofDocument *document, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data); +gboolean _sysprof_symbolizer_prepare_finish (SysprofSymbolizer *self, + GAsyncResult *result, + GError **error); +SysprofSymbol *_sysprof_symbolizer_symbolize (SysprofSymbolizer *self, + gint64 time, + int pid, + SysprofAddress address); G_END_DECLS diff --git a/src/libsysprof-analyze/sysprof-symbolizer.c b/src/libsysprof-analyze/sysprof-symbolizer.c index 8f494903..904d08ae 100644 --- a/src/libsysprof-analyze/sysprof-symbolizer.c +++ b/src/libsysprof-analyze/sysprof-symbolizer.c @@ -89,3 +89,12 @@ _sysprof_symbolizer_prepare_finish (SysprofSymbolizer *self, return SYSPROF_SYMBOLIZER_GET_CLASS (self)->prepare_finish (self, result, error); } + +SysprofSymbol * +_sysprof_symbolizer_symbolize (SysprofSymbolizer *self, + gint64 time, + int pid, + SysprofAddress address) +{ + return SYSPROF_SYMBOLIZER_GET_CLASS (self)->symbolize (self, time, pid, address); +}