From dbabe87b3dcb50654b408df60f6b32e3bffbe35d Mon Sep 17 00:00:00 2001 From: Christian Hergert Date: Mon, 17 Jul 2023 17:13:30 -0700 Subject: [PATCH] libsysprof-analyze: add API to serialize symbol cache This will allow us to put the symbol cache back in the __symbols__ file like we did for online symbol resolution in previous versions. --- src/libsysprof-analyze/meson.build | 1 + .../sysprof-bundled-symbolizer-private.h | 39 +++++ .../sysprof-bundled-symbolizer.c | 44 +++--- .../sysprof-document-loader.c | 4 + src/libsysprof-analyze/sysprof-document.c | 135 ++++++++++++++++++ src/libsysprof-analyze/sysprof-document.h | 9 ++ .../sysprof-symbol-cache-private.h | 15 +- src/libsysprof-analyze/sysprof-symbol-cache.c | 48 +++++++ 8 files changed, 262 insertions(+), 33 deletions(-) create mode 100644 src/libsysprof-analyze/sysprof-bundled-symbolizer-private.h diff --git a/src/libsysprof-analyze/meson.build b/src/libsysprof-analyze/meson.build index c2bd9c93..8488c761 100644 --- a/src/libsysprof-analyze/meson.build +++ b/src/libsysprof-analyze/meson.build @@ -95,6 +95,7 @@ libsysprof_analyze_private_sources = [ libsysprof_analyze_deps = [ dependency('gio-2.0', version: glib_req_version), + dependency('libdex-1', version: dex_req_version), libeggbitset_static_dep, libelfparser_static_dep, diff --git a/src/libsysprof-analyze/sysprof-bundled-symbolizer-private.h b/src/libsysprof-analyze/sysprof-bundled-symbolizer-private.h new file mode 100644 index 00000000..086d4eaa --- /dev/null +++ b/src/libsysprof-analyze/sysprof-bundled-symbolizer-private.h @@ -0,0 +1,39 @@ +/* sysprof-bundled-symbolizer-private.h + * + * Copyright 2023 Christian Hergert + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#pragma once + +#include "sysprof-bundled-symbolizer.h" + +G_BEGIN_DECLS + +SYSPROF_ALIGNED_BEGIN(1) +typedef struct _SysprofPackedSymbol +{ + SysprofCaptureAddress addr_begin; + SysprofCaptureAddress addr_end; + guint32 pid; + guint32 offset; + guint32 tag_offset; + guint32 padding; +} SysprofPackedSymbol +SYSPROF_ALIGNED_END(1); + +G_END_DECLS diff --git a/src/libsysprof-analyze/sysprof-bundled-symbolizer.c b/src/libsysprof-analyze/sysprof-bundled-symbolizer.c index 5893d32b..421f9f43 100644 --- a/src/libsysprof-analyze/sysprof-bundled-symbolizer.c +++ b/src/libsysprof-analyze/sysprof-bundled-symbolizer.c @@ -20,33 +20,21 @@ #include "config.h" -#include "sysprof-bundled-symbolizer.h" +#include "sysprof-bundled-symbolizer-private.h" #include "sysprof-document-private.h" #include "sysprof-symbolizer-private.h" #include "sysprof-symbol-private.h" -SYSPROF_ALIGNED_BEGIN(1) -typedef struct -{ - SysprofCaptureAddress addr_begin; - SysprofCaptureAddress addr_end; - guint32 pid; - guint32 offset; - guint32 tag_offset; - guint32 padding; -} Decoded -SYSPROF_ALIGNED_END(1); - struct _SysprofBundledSymbolizer { - SysprofSymbolizer parent_instance; + SysprofSymbolizer parent_instance; - const Decoded *symbols; - guint n_symbols; + const SysprofPackedSymbol *symbols; + guint n_symbols; - GBytes *bytes; - const gchar *beginptr; - const gchar *endptr; + GBytes *bytes; + const gchar *beginptr; + const gchar *endptr; }; struct _SysprofBundledSymbolizerClass @@ -74,17 +62,17 @@ sysprof_bundled_symbolizer_decode (SysprofBundledSymbolizer *self, endptr = beginptr + g_bytes_get_size (bytes); for (char *ptr = beginptr; - ptr < endptr && (ptr + sizeof (Decoded)) < endptr; - ptr += sizeof (Decoded)) + ptr < endptr && (ptr + sizeof (SysprofPackedSymbol)) < endptr; + ptr += sizeof (SysprofPackedSymbol)) { - Decoded *sym = (Decoded *)ptr; + SysprofPackedSymbol *sym = (SysprofPackedSymbol *)ptr; if (sym->addr_begin == 0 && sym->addr_end == 0 && sym->pid == 0 && sym->offset == 0) { - self->symbols = (const Decoded *)beginptr; + self->symbols = (const SysprofPackedSymbol *)beginptr; self->n_symbols = sym - self->symbols; break; } @@ -145,8 +133,8 @@ static gint search_for_symbol_cb (gconstpointer a, gconstpointer b) { - const Decoded *key = a; - const Decoded *ele = b; + const SysprofPackedSymbol *key = a; + const SysprofPackedSymbol *ele = b; if (key->pid < ele->pid) return -1; @@ -177,8 +165,8 @@ sysprof_bundled_symbolizer_symbolize (SysprofSymbolizer *symbolizer, { SysprofBundledSymbolizer *self = SYSPROF_BUNDLED_SYMBOLIZER (symbolizer); g_autoptr(GRefString) tag = NULL; - const Decoded *ret; - const Decoded key = { + const SysprofPackedSymbol *ret; + const SysprofPackedSymbol key = { .addr_begin = address, .addr_end = address, .pid = process_info ? process_info->pid : 0, @@ -195,7 +183,7 @@ sysprof_bundled_symbolizer_symbolize (SysprofSymbolizer *symbolizer, ret = bsearch (&key, self->symbols, self->n_symbols, - sizeof (Decoded), + sizeof (SysprofPackedSymbol), search_for_symbol_cb); if (ret == NULL || ret->offset == 0) diff --git a/src/libsysprof-analyze/sysprof-document-loader.c b/src/libsysprof-analyze/sysprof-document-loader.c index ccdf7a19..6622f325 100644 --- a/src/libsysprof-analyze/sysprof-document-loader.c +++ b/src/libsysprof-analyze/sysprof-document-loader.c @@ -27,6 +27,7 @@ #include #include "sysprof-bundled-symbolizer.h" +#include "sysprof-document-bitset-index-private.h" #include "sysprof-document-loader.h" #include "sysprof-document-private.h" #include "sysprof-elf-symbolizer.h" @@ -290,6 +291,9 @@ sysprof_document_loader_class_init (SysprofDocumentLoaderClass *klass) (G_PARAM_READWRITE | G_PARAM_EXPLICIT_NOTIFY | G_PARAM_STATIC_STRINGS)); g_object_class_install_properties (object_class, N_PROPS, properties); + + g_type_ensure (SYSPROF_TYPE_DOCUMENT); + g_type_ensure (SYSPROF_TYPE_DOCUMENT_BITSET_INDEX); } static void diff --git a/src/libsysprof-analyze/sysprof-document.c b/src/libsysprof-analyze/sysprof-document.c index 684c0132..6984e251 100644 --- a/src/libsysprof-analyze/sysprof-document.c +++ b/src/libsysprof-analyze/sysprof-document.c @@ -25,8 +25,11 @@ #include #include +#include + #include "sysprof-document-private.h" +#include "sysprof-bundled-symbolizer-private.h" #include "sysprof-callgraph-private.h" #include "sysprof-cpu-info-private.h" #include "sysprof-document-bitset-index-private.h" @@ -2319,3 +2322,135 @@ sysprof_document_list_cpu_info (SysprofDocument *self) return g_object_ref (G_LIST_MODEL (self->cpu_info)); } + +static int +sort_symbols_for_bsearch (gconstpointer a, + gconstpointer b) +{ + const SysprofPackedSymbol *packed_a = a; + const SysprofPackedSymbol *packed_b = b; + + if (packed_a->pid < packed_b->pid) + return -1; + + if (packed_a->pid > packed_b->pid) + return 1; + + if (packed_a->addr_begin < packed_b->addr_begin) + return -1; + + if (packed_a->addr_begin > packed_b->addr_begin) + return 1; + + return 0; +} + +static DexFuture * +sysprof_document_serialize_symbols_fiber (gpointer user_data) +{ + static const guint8 empty_string[1] = {0}; + static const SysprofPackedSymbol empty_symbol = {0}; + SysprofDocument *self = user_data; + g_autoptr(GByteArray) strings = NULL; + g_autoptr(GHashTable) strings_offset = NULL; + g_autoptr(GBytes) bytes = NULL; + g_autoptr(GArray) packed_symbols = NULL; + GHashTableIter iter; + gpointer value; + char *data; + gsize packed_len; + + g_assert (SYSPROF_IS_DOCUMENT (self)); + + packed_symbols = g_array_new (FALSE, FALSE, sizeof (SysprofPackedSymbol)); + strings = g_byte_array_new (); + strings_offset = g_hash_table_new (g_str_hash, g_str_equal); + + /* Always put empty string at head so 0 is never a valid + * offset for the document entries except for "". + */ + g_byte_array_append (strings, empty_string, sizeof empty_string); + g_hash_table_insert (strings_offset, "", 0); + + g_hash_table_iter_init (&iter, self->pid_to_process_info); + while (g_hash_table_iter_next (&iter, NULL, &value)) + { + const SysprofProcessInfo *process_info = value; + + if (process_info->symbol_cache != NULL) + sysprof_symbol_cache_populate_packed (process_info->symbol_cache, + packed_symbols, + strings, + strings_offset, + process_info->pid); + } + + g_array_sort (packed_symbols, sort_symbols_for_bsearch); + g_array_append_val (packed_symbols, empty_symbol); + + packed_len = sizeof (SysprofPackedSymbol) * packed_symbols->len; + + /* Update the offsets to be relative to the beginning of the + * section containing our packed symbols. Ignore the last symbol + * so that it stays all zero. + */ + for (guint i = 0; i < packed_symbols->len-1; i++) + { + g_array_index (packed_symbols, SysprofPackedSymbol, i).offset += packed_len; + g_array_index (packed_symbols, SysprofPackedSymbol, i).tag_offset += packed_len; + } + + if (G_MAXSSIZE - packed_len < strings->len) + return dex_future_new_for_errno (ENOMEM); + + data = g_malloc (packed_len + strings->len); + memcpy (data, packed_symbols->data, packed_len); + memcpy (data+packed_len, strings->data, strings->len); + + bytes = g_bytes_new_take (data, packed_len + strings->len); + + return dex_future_new_take_boxed (G_TYPE_BYTES, g_steal_pointer (&bytes)); +} + +void +sysprof_document_serialize_symbols_async (SysprofDocument *self, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data) +{ + g_autoptr(DexAsyncResult) result = NULL; + + g_return_if_fail (SYSPROF_IS_DOCUMENT (self)); + g_return_if_fail (!cancellable || G_IS_CANCELLABLE (cancellable)); + + result = dex_async_result_new (self, cancellable, callback, user_data); + + dex_async_result_await (result, + dex_scheduler_spawn (dex_thread_pool_scheduler_get_default (), 0, + sysprof_document_serialize_symbols_fiber, + g_object_ref (self), + g_object_unref)); +} + +/** + * sysprof_document_serialize_symbols_finish: + * @self: a #SysprofDocument + * @result: a #GAsyncResult + * @error: a location for a #GError + * + * Completes a request to serialize the symbols of the document + * encoded in a format that Sysprof understands. + * + * Returns: (transfer full): a #GBytes if successful; otherwise %NULL + * and @error is set. + */ +GBytes * +sysprof_document_serialize_symbols_finish (SysprofDocument *self, + GAsyncResult *result, + GError **error) +{ + g_return_val_if_fail (SYSPROF_IS_DOCUMENT (self), NULL); + g_return_val_if_fail (DEX_IS_ASYNC_RESULT (result), NULL); + + return dex_async_result_propagate_pointer (DEX_ASYNC_RESULT (result), error); +} diff --git a/src/libsysprof-analyze/sysprof-document.h b/src/libsysprof-analyze/sysprof-document.h index a8e93581..2eb85fa0 100644 --- a/src/libsysprof-analyze/sysprof-document.h +++ b/src/libsysprof-analyze/sysprof-document.h @@ -105,5 +105,14 @@ SYSPROF_AVAILABLE_IN_ALL SysprofCallgraph *sysprof_document_callgraph_finish (SysprofDocument *self, GAsyncResult *result, GError **error); +SYSPROF_AVAILABLE_IN_ALL +void sysprof_document_serialize_symbols_async (SysprofDocument *self, + GCancellable *cancellable, + GAsyncReadyCallback callback, + gpointer user_data); +SYSPROF_AVAILABLE_IN_ALL +GBytes *sysprof_document_serialize_symbols_finish (SysprofDocument *self, + GAsyncResult *result, + GError **error); G_END_DECLS diff --git a/src/libsysprof-analyze/sysprof-symbol-cache-private.h b/src/libsysprof-analyze/sysprof-symbol-cache-private.h index c3842b52..4c96130f 100644 --- a/src/libsysprof-analyze/sysprof-symbol-cache-private.h +++ b/src/libsysprof-analyze/sysprof-symbol-cache-private.h @@ -30,10 +30,15 @@ G_BEGIN_DECLS G_DECLARE_FINAL_TYPE (SysprofSymbolCache, sysprof_symbol_cache, SYSPROF, SYMBOL_CACHE, GObject) -SysprofSymbolCache *sysprof_symbol_cache_new (void); -SysprofSymbol *sysprof_symbol_cache_lookup (SysprofSymbolCache *self, - SysprofAddress address); -void sysprof_symbol_cache_take (SysprofSymbolCache *self, - SysprofSymbol *symbol); +SysprofSymbolCache *sysprof_symbol_cache_new (void); +SysprofSymbol *sysprof_symbol_cache_lookup (SysprofSymbolCache *self, + SysprofAddress address); +void sysprof_symbol_cache_take (SysprofSymbolCache *self, + SysprofSymbol *symbol); +void sysprof_symbol_cache_populate_packed (SysprofSymbolCache *self, + GArray *array, + GByteArray *strings, + GHashTable *strings_offset, + int pid); G_END_DECLS diff --git a/src/libsysprof-analyze/sysprof-symbol-cache.c b/src/libsysprof-analyze/sysprof-symbol-cache.c index 0e92a1d5..5e9c97ef 100644 --- a/src/libsysprof-analyze/sysprof-symbol-cache.c +++ b/src/libsysprof-analyze/sysprof-symbol-cache.c @@ -26,6 +26,7 @@ static void sysprof_symbol_cache_node_augment (SysprofSymbolCacheNode *node); #include "tree.h" +#include "sysprof-bundled-symbolizer-private.h" #include "sysprof-symbol-private.h" #include "sysprof-symbol-cache-private.h" @@ -212,3 +213,50 @@ sysprof_symbol_cache_lookup (SysprofSymbolCache *self, return NULL; } +static guint +get_string (GByteArray *strings, + GHashTable *strings_offset, + const char *string) +{ + guint pos; + + if (string == NULL || string[0] == 0) + return 0; + + pos = GPOINTER_TO_UINT (g_hash_table_lookup (strings_offset, string)); + + if (pos == 0) + { + pos = strings->len; + g_byte_array_append (strings, (const guint8 *)string, strlen (string) + 1); + g_hash_table_insert (strings_offset, (char *)string, GUINT_TO_POINTER (pos)); + } + + return pos; +} + +void +sysprof_symbol_cache_populate_packed (SysprofSymbolCache *self, + GArray *array, + GByteArray *strings, + GHashTable *strings_offset, + int pid) +{ + SysprofSymbolCacheNode *node; + + g_return_if_fail (SYSPROF_IS_SYMBOL_CACHE (self)); + g_return_if_fail (array != NULL); + + RB_FOREACH(node, sysprof_symbol_cache, &self->head) { + SysprofPackedSymbol packed; + SysprofSymbol *symbol = node->symbol; + + packed.addr_begin = symbol->begin_address; + packed.addr_end = symbol->end_address; + packed.pid = pid; + packed.offset = get_string (strings, strings_offset, symbol->name); + packed.tag_offset = get_string (strings, strings_offset, symbol->binary_nick); + + g_array_append_val (array, packed); + } +}