From 81c384a9741c0bb3c2d4eaf8f7bd6d9b4e60d50f Mon Sep 17 00:00:00 2001 From: Christian Hergert Date: Fri, 12 May 2023 14:13:40 -0700 Subject: [PATCH] libsysprof-analyze: use interval tree for symbol cache This uses an augmented red-black tree to create an interval tree with non-interval lookups. That amounts to storing address ranges within the red-black tree, but looking up by single address. --- src/libsysprof-analyze/sysprof-symbol-cache.c | 184 +++++++++++++----- src/libsysprof-analyze/tests/meson.build | 4 +- .../tests/test-symbol-cache.c | 171 ++++++++++++++++ 3 files changed, 310 insertions(+), 49 deletions(-) create mode 100644 src/libsysprof-analyze/tests/test-symbol-cache.c diff --git a/src/libsysprof-analyze/sysprof-symbol-cache.c b/src/libsysprof-analyze/sysprof-symbol-cache.c index 37f4dd88..d4f7b866 100644 --- a/src/libsysprof-analyze/sysprof-symbol-cache.c +++ b/src/libsysprof-analyze/sysprof-symbol-cache.c @@ -20,23 +20,88 @@ #include "config.h" +typedef struct _SysprofSymbolCacheNode SysprofSymbolCacheNode; +static void sysprof_symbol_cache_node_augment (SysprofSymbolCacheNode *node); +#define RB_AUGMENT(elem) sysprof_symbol_cache_node_augment(elem) + +#include "tree.h" + #include "sysprof-symbol-private.h" #include "sysprof-symbol-cache-private.h" +struct _SysprofSymbolCacheNode +{ + RB_ENTRY(_SysprofSymbolCacheNode) link; + SysprofSymbol *symbol; + guint64 low; + guint64 high; + guint64 max; +}; + struct _SysprofSymbolCache { - GObject parent_instance; - GSequence *symbols; + GObject parent_instance; + RB_HEAD(sysprof_symbol_cache, _SysprofSymbolCacheNode) head; }; G_DEFINE_FINAL_TYPE (SysprofSymbolCache, sysprof_symbol_cache, G_TYPE_OBJECT) +static inline int +sysprof_symbol_cache_node_compare (SysprofSymbolCacheNode *a, + SysprofSymbolCacheNode *b) +{ + if (a->low < b->low) + return -1; + else if (a->low > b->low) + return 1; + else + return 0; +} + +RB_GENERATE_STATIC(sysprof_symbol_cache, _SysprofSymbolCacheNode, link, sysprof_symbol_cache_node_compare); + +static void +sysprof_symbol_cache_node_augment (SysprofSymbolCacheNode *node) +{ + node->max = node->high; + + if (RB_LEFT(node, link) && RB_LEFT(node, link)->max > node->max) + node->max = RB_LEFT(node, link)->max; + + if (RB_RIGHT(node, link) && RB_RIGHT(node, link)->max > node->max) + node->max = RB_RIGHT(node, link)->max; +} + +static void +sysprof_symbol_cache_node_finalize (SysprofSymbolCacheNode *node) +{ + g_clear_object (&node->symbol); + g_free (node); +} + +static void +sysprof_symbol_cache_node_free (SysprofSymbolCacheNode *node) +{ + SysprofSymbolCacheNode *right = RB_RIGHT(node, link); + SysprofSymbolCacheNode *left = RB_LEFT(node, link); + + if (left != NULL) + sysprof_symbol_cache_node_free (left); + + sysprof_symbol_cache_node_finalize (node); + + if (right != NULL) + sysprof_symbol_cache_node_free (right); +} + static void sysprof_symbol_cache_finalize (GObject *object) { SysprofSymbolCache *self = (SysprofSymbolCache *)object; + SysprofSymbolCacheNode *node = RB_ROOT(&self->head); - g_clear_pointer (&self->symbols, g_sequence_free); + if (node != NULL) + sysprof_symbol_cache_node_free (node); G_OBJECT_CLASS (sysprof_symbol_cache_parent_class)->finalize (object); } @@ -52,7 +117,7 @@ sysprof_symbol_cache_class_init (SysprofSymbolCacheClass *klass) static void sysprof_symbol_cache_init (SysprofSymbolCache *self) { - self->symbols = g_sequence_new (g_object_unref); + RB_INIT (&self->head); } SysprofSymbolCache * @@ -61,80 +126,105 @@ sysprof_symbol_cache_new (void) return g_object_new (SYSPROF_TYPE_SYMBOL_CACHE, NULL); } -static int -sysprof_symbol_cache_compare (gconstpointer a, - gconstpointer b, - gpointer user_data) +#if 0 +static void +print_tree (SysprofSymbolCacheNode *node) { - const SysprofSymbol *sym_a = a; - const SysprofSymbol *sym_b = b; + SysprofSymbolCacheNode *left = RB_LEFT (node, link); + SysprofSymbolCacheNode *right = RB_RIGHT (node, link); - if (sym_a->begin_address < sym_b->begin_address) - return -1; + g_print ("[%lx:%lx max=%lx];\n", node->low, node->high, node->max); - if (sym_a->begin_address > sym_b->end_address) - return 1; + if (left) + { + g_print ("[%lx:%lx]'L -> [%lx:%lx];\n", + node->low, node->high, + left->low, left->high); + print_tree (left); + } - return 0; + if (right) + { + g_print ("[%lx:%lx]'R -> [%lx:%lx];\n", + node->low, node->high, + right->low, right->high); + print_tree (right); + } } +#endif -/** - * sysprof_symbol_cache_take: - * @self: a #SysprofSymbolCache - * @symbol: (transfer full): a #SysprofSymbol - * - */ void sysprof_symbol_cache_take (SysprofSymbolCache *self, SysprofSymbol *symbol) { + SysprofSymbolCacheNode *node; + SysprofSymbolCacheNode *parent; + g_return_if_fail (SYSPROF_IS_SYMBOL_CACHE (self)); g_return_if_fail (SYSPROF_IS_SYMBOL (symbol)); + g_return_if_fail (symbol->end_address > symbol->begin_address); - if (symbol->begin_address == 0 || symbol->end_address == 0) - return; + /* Some symbols are not suitable for our interval tree */ + if (symbol->begin_address == 0 || + symbol->end_address == 0 || + symbol->begin_address == symbol->end_address) + { + g_object_unref (symbol); + return; + } - g_sequence_insert_sorted (self->symbols, - g_object_ref (symbol), - sysprof_symbol_cache_compare, - NULL); -} + node = g_new0 (SysprofSymbolCacheNode, 1); + node->symbol = symbol; + node->low = symbol->begin_address; + node->high = symbol->end_address-1; + node->max = node->high; -static int -sysprof_symbol_cache_lookup_func (gconstpointer a, - gconstpointer b, - gpointer user_data) -{ - const SysprofSymbol *sym_a = a; - const gint64 *addr = b; + RB_INSERT(sysprof_symbol_cache, &self->head, node); - if (*addr < sym_a->begin_address) - return 1; + parent = RB_PARENT(node, link); - if (*addr > sym_a->end_address) - return -1; + while (parent != NULL) + { + if (node->max > parent->max) + parent->max = node->max; + node = parent; + parent = RB_PARENT(parent, link); + } - return 0; +#if 0 + g_print ("=====\n"); + print_tree (RB_ROOT (&self->head)); +#endif } SysprofSymbol * sysprof_symbol_cache_lookup (SysprofSymbolCache *self, SysprofAddress address) { - GSequenceIter *iter; + SysprofSymbolCacheNode *node; g_return_val_if_fail (SYSPROF_IS_SYMBOL_CACHE (self), NULL); if (address == 0) return NULL; - iter = g_sequence_lookup (self->symbols, - &address, - sysprof_symbol_cache_lookup_func, - NULL); + node = RB_ROOT(&self->head); - if (iter != NULL) - return g_sequence_get (iter); + while (node != NULL) + { + g_assert (RB_LEFT(node, link) == NULL || + node->max >= RB_LEFT(node, link)->max); + g_assert (RB_RIGHT(node, link) == NULL || + node->max >= RB_RIGHT(node, link)->max); + + if (address >= node->low && address <= node->high) + return node->symbol; + + if (RB_LEFT(node, link) && RB_LEFT(node, link)->max >= address) + node = RB_LEFT(node, link); + else + node = RB_RIGHT(node, link); + } return NULL; } diff --git a/src/libsysprof-analyze/tests/meson.build b/src/libsysprof-analyze/tests/meson.build index b1a045cc..1e161e3a 100644 --- a/src/libsysprof-analyze/tests/meson.build +++ b/src/libsysprof-analyze/tests/meson.build @@ -17,11 +17,11 @@ libsysprof_analyze_testsuite = { 'test-print-file' : {'skip': true}, 'test-list-processes' : {'skip': true}, 'test-symbolize' : {'skip': true}, + 'test-symbol-cache' : {}, } libsysprof_analyze_testsuite_deps = [ - libsysprof_analyze_dep, - libsysprof_capture_dep, + libsysprof_analyze_static_dep, ] foreach test, params: libsysprof_analyze_testsuite diff --git a/src/libsysprof-analyze/tests/test-symbol-cache.c b/src/libsysprof-analyze/tests/test-symbol-cache.c new file mode 100644 index 00000000..02956497 --- /dev/null +++ b/src/libsysprof-analyze/tests/test-symbol-cache.c @@ -0,0 +1,171 @@ +/* test-symbol-cache.c + * + * Copyright 2023 Christian Hergert + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + * SPDX-License-Identifier: GPL-3.0-or-later + */ + +#include + +#include + +#include "sysprof-symbol-private.h" +#include "sysprof-symbol-cache-private.h" + +typedef struct _SymbolInfo +{ + const char *name; + guint64 begin; + guint64 end; + int position; + int sort; + SysprofSymbol *symbol; +} SymbolInfo; + +static SysprofSymbol * +create_symbol (const char *name, + guint64 begin, + guint64 end) +{ + g_assert (begin < end); + + return _sysprof_symbol_new (g_ref_string_new (name), NULL, NULL, begin, end); +} + +static int +sort_by_key (gconstpointer a, + gconstpointer b) +{ + const SymbolInfo *info_a = a; + const SymbolInfo *info_b = b; + + if (info_a->sort < info_b->sort) + return -1; + else if (info_a->sort > info_b->sort) + return 1; + else + return 0; +} + +static int +sort_by_position (gconstpointer a, + gconstpointer b) +{ + const SymbolInfo *info_a = a; + const SymbolInfo *info_b = b; + + if (info_a->position < info_b->position) + return -1; + else if (info_a->position > info_b->position) + return 1; + else + return 0; +} + +static void +test_interval_tree (void) +{ + SysprofSymbolCache *symbol_cache = sysprof_symbol_cache_new (); + SymbolInfo symbols[] = { + { "symbol1", 0x10000, 0x20000 }, + { "symbol2", 0x20000, 0x30000 }, + { "symbol3", 0x30000, 0x40000 }, + { "symbol4", 0x90000, 0xa0000 }, + { "symbol5", 0xb0000, 0xb0001 }, + { "symbol6", 0xb0001, 0xb0002 }, + }; + + /* Add some randomness on insertion */ + for (guint i = 0; i < G_N_ELEMENTS (symbols); i++) + { + symbols[i].position = i; + symbols[i].sort = g_random_int (); + } + + /* Sort randomly for insertion */ + qsort (symbols, G_N_ELEMENTS (symbols), sizeof (SymbolInfo), sort_by_key); + for (guint i = 0; i < G_N_ELEMENTS (symbols); i++) + { + SymbolInfo *info = &symbols[i]; + + g_assert_cmpint (info->begin, <, info->end); + + info->symbol = create_symbol (info->name, info->begin, info->end); + + g_assert_nonnull (info->symbol); + g_assert_true (SYSPROF_IS_SYMBOL (info->symbol)); + + sysprof_symbol_cache_take (symbol_cache, g_object_ref (info->symbol)); + } + + /* Now resort to do lookups with edge checking */ + qsort (symbols, G_N_ELEMENTS (symbols), sizeof (SymbolInfo), sort_by_position); + for (guint i = 0; i < G_N_ELEMENTS (symbols); i++) + { + const SymbolInfo *info = &symbols[i]; + const SymbolInfo *prev = i > 0 ? &symbols[i-1] : NULL; + const SymbolInfo *next = i + 1 < G_N_ELEMENTS (symbols) ? &symbols[i+1] : NULL; + SysprofSymbol *lookup; + + g_assert_cmpint (info->position, ==, i); + + lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin-1); + if (prev && info->begin == prev->end) + g_assert_true (lookup == prev->symbol); + else + g_assert_null (lookup); + + lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin); + g_assert_nonnull (lookup); + g_assert_true (lookup == info->symbol); + + lookup = sysprof_symbol_cache_lookup (symbol_cache, info->end); + if (next == NULL || next->begin > info->end) + g_assert_null (lookup); + else + g_assert_true (lookup == next->symbol); + + if (info->begin+1 != info->end) + { + lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin+1); + g_assert_nonnull (lookup); + g_assert_true (lookup == info->symbol); + } + + lookup = sysprof_symbol_cache_lookup (symbol_cache, info->end-1); + g_assert_nonnull (lookup); + g_assert_true (lookup == info->symbol); + + lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin + ((info->end-info->begin)/2)); + g_assert_nonnull (lookup); + g_assert_true (lookup == info->symbol); + } + + g_assert_finalize_object (symbol_cache); + + for (guint i = 0; i < G_N_ELEMENTS (symbols); i++) + g_assert_finalize_object (symbols[i].symbol); +} + +int +main (int argc, + char *argv[]) +{ + g_test_init (&argc, &argv, NULL); + g_test_add_func ("/libsysprof-analyze/SysprofSymbolCache/interval-tree", + test_interval_tree); + return g_test_run (); +}