libsysprof-analyze: use interval tree for symbol cache

This uses an augmented red-black tree to create an interval tree with
non-interval lookups. That amounts to storing address ranges within the
red-black tree, but looking up by single address.
This commit is contained in:
Christian Hergert
2023-05-12 14:13:40 -07:00
parent 379db77349
commit 81c384a974
3 changed files with 310 additions and 49 deletions

View File

@ -20,23 +20,88 @@
#include "config.h"
typedef struct _SysprofSymbolCacheNode SysprofSymbolCacheNode;
static void sysprof_symbol_cache_node_augment (SysprofSymbolCacheNode *node);
#define RB_AUGMENT(elem) sysprof_symbol_cache_node_augment(elem)
#include "tree.h"
#include "sysprof-symbol-private.h"
#include "sysprof-symbol-cache-private.h"
struct _SysprofSymbolCacheNode
{
RB_ENTRY(_SysprofSymbolCacheNode) link;
SysprofSymbol *symbol;
guint64 low;
guint64 high;
guint64 max;
};
struct _SysprofSymbolCache
{
GObject parent_instance;
GSequence *symbols;
GObject parent_instance;
RB_HEAD(sysprof_symbol_cache, _SysprofSymbolCacheNode) head;
};
G_DEFINE_FINAL_TYPE (SysprofSymbolCache, sysprof_symbol_cache, G_TYPE_OBJECT)
static inline int
sysprof_symbol_cache_node_compare (SysprofSymbolCacheNode *a,
SysprofSymbolCacheNode *b)
{
if (a->low < b->low)
return -1;
else if (a->low > b->low)
return 1;
else
return 0;
}
RB_GENERATE_STATIC(sysprof_symbol_cache, _SysprofSymbolCacheNode, link, sysprof_symbol_cache_node_compare);
static void
sysprof_symbol_cache_node_augment (SysprofSymbolCacheNode *node)
{
node->max = node->high;
if (RB_LEFT(node, link) && RB_LEFT(node, link)->max > node->max)
node->max = RB_LEFT(node, link)->max;
if (RB_RIGHT(node, link) && RB_RIGHT(node, link)->max > node->max)
node->max = RB_RIGHT(node, link)->max;
}
static void
sysprof_symbol_cache_node_finalize (SysprofSymbolCacheNode *node)
{
g_clear_object (&node->symbol);
g_free (node);
}
static void
sysprof_symbol_cache_node_free (SysprofSymbolCacheNode *node)
{
SysprofSymbolCacheNode *right = RB_RIGHT(node, link);
SysprofSymbolCacheNode *left = RB_LEFT(node, link);
if (left != NULL)
sysprof_symbol_cache_node_free (left);
sysprof_symbol_cache_node_finalize (node);
if (right != NULL)
sysprof_symbol_cache_node_free (right);
}
static void
sysprof_symbol_cache_finalize (GObject *object)
{
SysprofSymbolCache *self = (SysprofSymbolCache *)object;
SysprofSymbolCacheNode *node = RB_ROOT(&self->head);
g_clear_pointer (&self->symbols, g_sequence_free);
if (node != NULL)
sysprof_symbol_cache_node_free (node);
G_OBJECT_CLASS (sysprof_symbol_cache_parent_class)->finalize (object);
}
@ -52,7 +117,7 @@ sysprof_symbol_cache_class_init (SysprofSymbolCacheClass *klass)
static void
sysprof_symbol_cache_init (SysprofSymbolCache *self)
{
self->symbols = g_sequence_new (g_object_unref);
RB_INIT (&self->head);
}
SysprofSymbolCache *
@ -61,80 +126,105 @@ sysprof_symbol_cache_new (void)
return g_object_new (SYSPROF_TYPE_SYMBOL_CACHE, NULL);
}
static int
sysprof_symbol_cache_compare (gconstpointer a,
gconstpointer b,
gpointer user_data)
#if 0
static void
print_tree (SysprofSymbolCacheNode *node)
{
const SysprofSymbol *sym_a = a;
const SysprofSymbol *sym_b = b;
SysprofSymbolCacheNode *left = RB_LEFT (node, link);
SysprofSymbolCacheNode *right = RB_RIGHT (node, link);
if (sym_a->begin_address < sym_b->begin_address)
return -1;
g_print ("[%lx:%lx max=%lx];\n", node->low, node->high, node->max);
if (sym_a->begin_address > sym_b->end_address)
return 1;
if (left)
{
g_print ("[%lx:%lx]'L -> [%lx:%lx];\n",
node->low, node->high,
left->low, left->high);
print_tree (left);
}
return 0;
if (right)
{
g_print ("[%lx:%lx]'R -> [%lx:%lx];\n",
node->low, node->high,
right->low, right->high);
print_tree (right);
}
}
#endif
/**
* sysprof_symbol_cache_take:
* @self: a #SysprofSymbolCache
* @symbol: (transfer full): a #SysprofSymbol
*
*/
void
sysprof_symbol_cache_take (SysprofSymbolCache *self,
SysprofSymbol *symbol)
{
SysprofSymbolCacheNode *node;
SysprofSymbolCacheNode *parent;
g_return_if_fail (SYSPROF_IS_SYMBOL_CACHE (self));
g_return_if_fail (SYSPROF_IS_SYMBOL (symbol));
g_return_if_fail (symbol->end_address > symbol->begin_address);
if (symbol->begin_address == 0 || symbol->end_address == 0)
return;
/* Some symbols are not suitable for our interval tree */
if (symbol->begin_address == 0 ||
symbol->end_address == 0 ||
symbol->begin_address == symbol->end_address)
{
g_object_unref (symbol);
return;
}
g_sequence_insert_sorted (self->symbols,
g_object_ref (symbol),
sysprof_symbol_cache_compare,
NULL);
}
node = g_new0 (SysprofSymbolCacheNode, 1);
node->symbol = symbol;
node->low = symbol->begin_address;
node->high = symbol->end_address-1;
node->max = node->high;
static int
sysprof_symbol_cache_lookup_func (gconstpointer a,
gconstpointer b,
gpointer user_data)
{
const SysprofSymbol *sym_a = a;
const gint64 *addr = b;
RB_INSERT(sysprof_symbol_cache, &self->head, node);
if (*addr < sym_a->begin_address)
return 1;
parent = RB_PARENT(node, link);
if (*addr > sym_a->end_address)
return -1;
while (parent != NULL)
{
if (node->max > parent->max)
parent->max = node->max;
node = parent;
parent = RB_PARENT(parent, link);
}
return 0;
#if 0
g_print ("=====\n");
print_tree (RB_ROOT (&self->head));
#endif
}
SysprofSymbol *
sysprof_symbol_cache_lookup (SysprofSymbolCache *self,
SysprofAddress address)
{
GSequenceIter *iter;
SysprofSymbolCacheNode *node;
g_return_val_if_fail (SYSPROF_IS_SYMBOL_CACHE (self), NULL);
if (address == 0)
return NULL;
iter = g_sequence_lookup (self->symbols,
&address,
sysprof_symbol_cache_lookup_func,
NULL);
node = RB_ROOT(&self->head);
if (iter != NULL)
return g_sequence_get (iter);
while (node != NULL)
{
g_assert (RB_LEFT(node, link) == NULL ||
node->max >= RB_LEFT(node, link)->max);
g_assert (RB_RIGHT(node, link) == NULL ||
node->max >= RB_RIGHT(node, link)->max);
if (address >= node->low && address <= node->high)
return node->symbol;
if (RB_LEFT(node, link) && RB_LEFT(node, link)->max >= address)
node = RB_LEFT(node, link);
else
node = RB_RIGHT(node, link);
}
return NULL;
}

View File

@ -17,11 +17,11 @@ libsysprof_analyze_testsuite = {
'test-print-file' : {'skip': true},
'test-list-processes' : {'skip': true},
'test-symbolize' : {'skip': true},
'test-symbol-cache' : {},
}
libsysprof_analyze_testsuite_deps = [
libsysprof_analyze_dep,
libsysprof_capture_dep,
libsysprof_analyze_static_dep,
]
foreach test, params: libsysprof_analyze_testsuite

View File

@ -0,0 +1,171 @@
/* test-symbol-cache.c
*
* Copyright 2023 Christian Hergert <chergert@redhat.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#include <stdlib.h>
#include <sysprof-analyze.h>
#include "sysprof-symbol-private.h"
#include "sysprof-symbol-cache-private.h"
typedef struct _SymbolInfo
{
const char *name;
guint64 begin;
guint64 end;
int position;
int sort;
SysprofSymbol *symbol;
} SymbolInfo;
static SysprofSymbol *
create_symbol (const char *name,
guint64 begin,
guint64 end)
{
g_assert (begin < end);
return _sysprof_symbol_new (g_ref_string_new (name), NULL, NULL, begin, end);
}
static int
sort_by_key (gconstpointer a,
gconstpointer b)
{
const SymbolInfo *info_a = a;
const SymbolInfo *info_b = b;
if (info_a->sort < info_b->sort)
return -1;
else if (info_a->sort > info_b->sort)
return 1;
else
return 0;
}
static int
sort_by_position (gconstpointer a,
gconstpointer b)
{
const SymbolInfo *info_a = a;
const SymbolInfo *info_b = b;
if (info_a->position < info_b->position)
return -1;
else if (info_a->position > info_b->position)
return 1;
else
return 0;
}
static void
test_interval_tree (void)
{
SysprofSymbolCache *symbol_cache = sysprof_symbol_cache_new ();
SymbolInfo symbols[] = {
{ "symbol1", 0x10000, 0x20000 },
{ "symbol2", 0x20000, 0x30000 },
{ "symbol3", 0x30000, 0x40000 },
{ "symbol4", 0x90000, 0xa0000 },
{ "symbol5", 0xb0000, 0xb0001 },
{ "symbol6", 0xb0001, 0xb0002 },
};
/* Add some randomness on insertion */
for (guint i = 0; i < G_N_ELEMENTS (symbols); i++)
{
symbols[i].position = i;
symbols[i].sort = g_random_int ();
}
/* Sort randomly for insertion */
qsort (symbols, G_N_ELEMENTS (symbols), sizeof (SymbolInfo), sort_by_key);
for (guint i = 0; i < G_N_ELEMENTS (symbols); i++)
{
SymbolInfo *info = &symbols[i];
g_assert_cmpint (info->begin, <, info->end);
info->symbol = create_symbol (info->name, info->begin, info->end);
g_assert_nonnull (info->symbol);
g_assert_true (SYSPROF_IS_SYMBOL (info->symbol));
sysprof_symbol_cache_take (symbol_cache, g_object_ref (info->symbol));
}
/* Now resort to do lookups with edge checking */
qsort (symbols, G_N_ELEMENTS (symbols), sizeof (SymbolInfo), sort_by_position);
for (guint i = 0; i < G_N_ELEMENTS (symbols); i++)
{
const SymbolInfo *info = &symbols[i];
const SymbolInfo *prev = i > 0 ? &symbols[i-1] : NULL;
const SymbolInfo *next = i + 1 < G_N_ELEMENTS (symbols) ? &symbols[i+1] : NULL;
SysprofSymbol *lookup;
g_assert_cmpint (info->position, ==, i);
lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin-1);
if (prev && info->begin == prev->end)
g_assert_true (lookup == prev->symbol);
else
g_assert_null (lookup);
lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin);
g_assert_nonnull (lookup);
g_assert_true (lookup == info->symbol);
lookup = sysprof_symbol_cache_lookup (symbol_cache, info->end);
if (next == NULL || next->begin > info->end)
g_assert_null (lookup);
else
g_assert_true (lookup == next->symbol);
if (info->begin+1 != info->end)
{
lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin+1);
g_assert_nonnull (lookup);
g_assert_true (lookup == info->symbol);
}
lookup = sysprof_symbol_cache_lookup (symbol_cache, info->end-1);
g_assert_nonnull (lookup);
g_assert_true (lookup == info->symbol);
lookup = sysprof_symbol_cache_lookup (symbol_cache, info->begin + ((info->end-info->begin)/2));
g_assert_nonnull (lookup);
g_assert_true (lookup == info->symbol);
}
g_assert_finalize_object (symbol_cache);
for (guint i = 0; i < G_N_ELEMENTS (symbols); i++)
g_assert_finalize_object (symbols[i].symbol);
}
int
main (int argc,
char *argv[])
{
g_test_init (&argc, &argv, NULL);
g_test_add_func ("/libsysprof-analyze/SysprofSymbolCache/interval-tree",
test_interval_tree);
return g_test_run ();
}