libsysprof: add utility to build symbol maps

These are useful to allow us to append symbol informatio to a capture file
using the existing symbol resolvers.

It can read/write a small format embedded within capture files so that
we can append them from the target machine rather than decoding from the
machine we run Sysprof UI on.
This commit is contained in:
Christian Hergert
2019-05-28 19:04:54 -07:00
parent ffda366809
commit 7ffd3e41cf
4 changed files with 664 additions and 0 deletions

View File

@ -53,6 +53,7 @@ libsysprof_private_sources = [
'sysprof-kallsyms.c',
'sysprof-line-reader.c',
'sysprof-map-lookaside.c',
'sysprof-symbol-map.c',
ipc_service_src,
stackstash_sources,
helpers_sources,

View File

@ -0,0 +1,509 @@
/* sysprof-symbol-map.c
*
* Copyright 2019 Christian Hergert <chergert@redhat.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#define G_LOG_DOMAIN "sysprof-symbol-map"
#include "config.h"
#include <unistd.h>
#include "sysprof-map-lookaside.h"
#include "sysprof-symbol-map.h"
/*
* Because we can't rely on the address ranges of symbols from ELF files
* or elsewhere, we have to duplicate a lot of entries when building this
* so that we can resolve all of the corrent addresses.
*/
SYSPROF_ALIGNED_BEGIN(1)
typedef struct
{
SysprofCaptureAddress addr_begin;
SysprofCaptureAddress addr_end;
guint32 pid;
guint32 offset;
guint32 tag_offset;
guint32 padding;
} Decoded
SYSPROF_ALIGNED_END(1);
struct _SysprofSymbolMap
{
/* For creating maps */
GStringChunk *chunk;
GHashTable *lookasides;
GPtrArray *resolvers;
GPtrArray *samples;
guint resolved : 1;
/* For reading maps */
GMappedFile *mapped;
const Decoded *symbols;
gsize n_symbols;
const gchar *beginptr;
const gchar *endptr;
};
typedef struct
{
SysprofCaptureAddress addr;
const gchar *name;
GQuark tag;
guint32 pid;
} Element;
static void
element_free (Element *ele)
{
g_slice_free (Element, ele);
}
static gint
element_compare (gconstpointer a,
gconstpointer b)
{
const Element *aa = *(const Element **)a;
const Element *bb = *(const Element **)b;
if (aa->pid < bb->pid)
return -1;
if (aa->pid > bb->pid)
return 1;
if (aa->addr < bb->addr)
return -1;
if (aa->addr > bb->addr)
return 1;
return 0;
}
static guint
element_hash (gconstpointer data)
{
const Element *ele = data;
struct {
guint32 a;
guint32 b;
} addr;
memcpy (&addr, &ele->addr, sizeof addr);
return addr.a ^ addr.b ^ ele->pid;
}
static gboolean
element_equal (gconstpointer a,
gconstpointer b)
{
const Element *aa = a;
const Element *bb = b;
return aa->pid == bb->pid && aa->addr == bb->addr;
}
SysprofSymbolMap *
sysprof_symbol_map_new (void)
{
SysprofSymbolMap *self;
self = g_slice_new0 (SysprofSymbolMap);
self->samples = g_ptr_array_new_with_free_func ((GDestroyNotify) element_free);
self->chunk = g_string_chunk_new (4096*16);
self->resolvers = g_ptr_array_new_with_free_func (g_object_unref);
self->lookasides = g_hash_table_new_full (NULL, NULL, NULL,
(GDestroyNotify) sysprof_map_lookaside_free);
return g_steal_pointer (&self);
}
void
sysprof_symbol_map_free (SysprofSymbolMap *self)
{
g_clear_pointer (&self->lookasides, g_hash_table_unref);
g_clear_pointer (&self->resolvers, g_ptr_array_unref);
g_clear_pointer (&self->chunk, g_string_chunk_free);
g_clear_pointer (&self->samples, g_ptr_array_unref);
g_clear_pointer (&self->mapped, g_mapped_file_unref);
g_slice_free (SysprofSymbolMap, self);
}
static gint
search_for_symbol_cb (gconstpointer a,
gconstpointer b)
{
const Decoded *key = a;
const Decoded *ele = b;
if (key->pid < ele->pid)
return -1;
if (key->pid > ele->pid)
return 1;
g_assert (key->pid == ele->pid);
if (key->addr_begin < ele->addr_begin)
return -1;
if (key->addr_begin > ele->addr_end)
return 1;
g_assert (key->addr_begin >= ele->addr_begin);
g_assert (key->addr_end <= ele->addr_end);
return 0;
}
const gchar *
sysprof_symbol_map_lookup (SysprofSymbolMap *self,
gint64 time,
gint32 pid,
SysprofCaptureAddress addr,
GQuark *tag)
{
const Decoded *ret;
const Decoded key = {
.addr_begin = addr,
.addr_end = addr,
.pid = pid,
.offset = 0,
.tag_offset = 0,
};
g_assert (self != NULL);
if (tag != NULL)
*tag = 0;
ret = bsearch (&key,
self->symbols,
self->n_symbols,
sizeof *ret,
search_for_symbol_cb);
if (ret == NULL)
return NULL;
if (tag != NULL && ret->tag_offset < (self->endptr - self->beginptr))
*tag = g_quark_from_string (&self->beginptr[ret->tag_offset]);
if (ret->offset < (self->endptr - self->beginptr))
return &self->beginptr[ret->offset];
return NULL;
}
void
sysprof_symbol_map_add_resolver (SysprofSymbolMap *self,
SysprofSymbolResolver *resolver)
{
g_assert (self != NULL);
g_assert (SYSPROF_IS_SYMBOL_RESOLVER (resolver));
g_ptr_array_add (self->resolvers, g_object_ref (resolver));
}
static void
sysprof_symbol_map_do_sample (SysprofSymbolMap *self,
SysprofCaptureReader *reader,
GHashTable *seen)
{
SysprofAddressContext last_context = SYSPROF_ADDRESS_CONTEXT_NONE;
const SysprofCaptureSample *sample;
g_assert (self != NULL);
g_assert (reader != NULL);
g_assert (seen != NULL);
if (!(sample = sysprof_capture_reader_read_sample (reader)))
return;
for (guint i = 0; i < sample->n_addrs; i++)
{
SysprofCaptureAddress addr = sample->addrs[i];
SysprofAddressContext context;
if (sysprof_address_is_context_switch (addr, &context))
{
last_context = context;
continue;
}
for (guint j = 0; j < self->resolvers->len; j++)
{
SysprofSymbolResolver *resolver = g_ptr_array_index (self->resolvers, j);
g_autofree gchar *name = NULL;
const gchar *cname;
Element ele;
GQuark tag = 0;
name = sysprof_symbol_resolver_resolve_with_context (resolver,
sample->frame.time,
sample->frame.pid,
last_context,
addr,
&tag);
if (name == NULL)
continue;
cname = g_string_chunk_insert_const (self->chunk, name);
ele.addr = addr;
ele.pid = sample->frame.pid;
ele.name = cname;
ele.tag = tag;
if (!g_hash_table_contains (seen, &ele))
{
Element *cpy = g_slice_dup (Element, &ele);
g_hash_table_add (seen, cpy);
g_ptr_array_add (self->samples, cpy);
}
}
}
}
void
sysprof_symbol_map_resolve (SysprofSymbolMap *self,
SysprofCaptureReader *reader)
{
g_autoptr(GHashTable) seen = NULL;
SysprofCaptureFrameType type;
g_return_if_fail (self != NULL);
g_return_if_fail (self->resolved == FALSE);
g_return_if_fail (reader != NULL);
self->resolved = TRUE;
seen = g_hash_table_new (element_hash, element_equal);
sysprof_capture_reader_reset (reader);
for (guint i = 0; i < self->resolvers->len; i++)
{
sysprof_symbol_resolver_load (g_ptr_array_index (self->resolvers, i), reader);
sysprof_capture_reader_reset (reader);
}
while (sysprof_capture_reader_peek_type (reader, &type))
{
if (type == SYSPROF_CAPTURE_FRAME_SAMPLE)
{
sysprof_symbol_map_do_sample (self, reader, seen);
continue;
}
if (!sysprof_capture_reader_skip (reader))
break;
}
g_ptr_array_sort (self->samples, element_compare);
}
void
sysprof_symbol_map_printf (SysprofSymbolMap *self)
{
g_return_if_fail (self != NULL);
g_return_if_fail (self->samples != NULL);
for (guint i = 0; i < self->samples->len; i++)
{
Element *ele = g_ptr_array_index (self->samples, i);
if (ele->tag)
g_print ("%-5d: %p: %s [%s]\n", ele->pid, (gpointer)ele->addr, ele->name, g_quark_to_string (ele->tag));
else
g_print ("%-5d: %p: %s\n", ele->pid, (gpointer)ele->addr, ele->name);
}
}
static guint
get_string_offset (GByteArray *ar,
GHashTable *seen,
const gchar *str)
{
gpointer ret;
if G_UNLIKELY (!g_hash_table_lookup_extended (seen, str, NULL, &ret))
{
ret = GUINT_TO_POINTER (ar->len);
g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1);
g_hash_table_insert (seen, (gpointer)str, ret);
}
return GPOINTER_TO_UINT (ret);
}
gboolean
sysprof_symbol_map_serialize (SysprofSymbolMap *self,
gint fd)
{
static const Decoded empty = {0};
SysprofCaptureAddress begin = 0;
g_autoptr(GByteArray) ar = NULL;
g_autoptr(GHashTable) seen = NULL;
g_autoptr(GArray) decoded = NULL;
gsize offset;
g_assert (self != NULL);
g_assert (fd != -1);
ar = g_byte_array_new ();
seen = g_hash_table_new (NULL, NULL);
decoded = g_array_new (FALSE, FALSE, sizeof (Decoded));
/* Add some empty space to both give us non-zero offsets and also ensure
* empty space between data.
*/
g_byte_array_append (ar, (guint8 *)&empty, sizeof empty);
for (guint i = 0; i < self->samples->len; i++)
{
Element *ele = g_ptr_array_index (self->samples, i);
if (!g_hash_table_contains (seen, ele->name))
{
const gchar *str = ele->name;
gpointer ptr = GUINT_TO_POINTER (ar->len);
g_byte_array_append (ar, (guint8 *)str, strlen (str) + 1);
g_hash_table_insert (seen, (gpointer)str, ptr);
}
}
for (guint i = 0; i < self->samples->len; i++)
{
Element *ele = g_ptr_array_index (self->samples, i);
Decoded dec;
if (begin == 0)
begin = ele->addr;
if ((i + 1) < self->samples->len)
{
Element *next = g_ptr_array_index (self->samples, i + 1);
if (ele->pid == next->pid && ele->name == next->name)
continue;
}
dec.padding = 0;
dec.addr_begin = begin;
dec.addr_end = ele->addr;
dec.pid = ele->pid;
dec.offset = get_string_offset (ar, seen, ele->name);
if (ele->tag)
dec.tag_offset = get_string_offset (ar, seen, g_quark_to_string (ele->tag));
else
dec.tag_offset = 0;
g_array_append_val (decoded, dec);
begin = 0;
}
offset = sizeof empty * decoded->len;
for (guint i = 0; i < decoded->len; i++)
{
Decoded *dec = &g_array_index (decoded, Decoded, i);
if (dec->offset)
dec->offset += offset;
if (dec->tag_offset)
dec->tag_offset += offset;
}
if (write (fd, decoded->data, offset) != offset)
return FALSE;
if (write (fd, ar->data, ar->len) != ar->len)
return FALSE;
/* Aggressively release state now that we're finished */
if (self->samples->len)
g_ptr_array_remove_range (self->samples, 0, self->samples->len);
if (self->resolvers != NULL)
g_ptr_array_remove_range (self->resolvers, 0, self->resolvers->len);
g_string_chunk_clear (self->chunk);
g_hash_table_remove_all (self->lookasides);
return TRUE;
}
gboolean
sysprof_symbol_map_deserialize (SysprofSymbolMap *self,
gint byte_order,
gint fd)
{
gboolean needs_swap = byte_order != G_BYTE_ORDER;
gchar *beginptr;
gchar *endptr;
g_return_val_if_fail (self != NULL, FALSE);
g_return_val_if_fail (self->mapped == NULL, FALSE);
if (!(self->mapped = g_mapped_file_new_from_fd (fd, TRUE, NULL)))
return FALSE;
beginptr = g_mapped_file_get_contents (self->mapped);
endptr = beginptr + g_mapped_file_get_length (self->mapped);
for (gchar *ptr = beginptr;
ptr < endptr && (ptr + sizeof (Decoded)) < endptr;
ptr += sizeof (Decoded))
{
Decoded *sym = (Decoded *)ptr;
if (sym->addr_begin == 0 &&
sym->addr_end == 0 &&
sym->pid == 0 &&
sym->offset == 0)
{
self->symbols = (const Decoded *)beginptr;
self->n_symbols = sym - self->symbols;
break;
}
else if (needs_swap)
{
sym->addr_begin = GUINT64_SWAP_LE_BE (sym->addr_begin);
sym->addr_end = GUINT64_SWAP_LE_BE (sym->addr_end);
sym->pid = GUINT32_SWAP_LE_BE (sym->pid);
sym->offset = GUINT32_SWAP_LE_BE (sym->offset);
sym->tag_offset = GUINT32_SWAP_LE_BE (sym->tag_offset);
}
#if 0
g_print ("Added pid=%d begin=%p end=%p\n",
sym->pid, (gpointer)sym->begin, (gpointer)sym->end);
#endif
}
self->beginptr = beginptr;
self->endptr = endptr;
return TRUE;
}

View File

@ -0,0 +1,49 @@
/* sysprof-symbol-map.h
*
* Copyright 2019 Christian Hergert <chergert@redhat.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: GPL-3.0-or-later
*/
#pragma once
#include <sysprof-capture.h>
#include "sysprof-symbol-resolver.h"
G_BEGIN_DECLS
typedef struct _SysprofSymbolMap SysprofSymbolMap;
SysprofSymbolMap *sysprof_symbol_map_new (void);
void sysprof_symbol_map_add_resolver (SysprofSymbolMap *self,
SysprofSymbolResolver *resolver);
void sysprof_symbol_map_resolve (SysprofSymbolMap *self,
SysprofCaptureReader *reader);
const gchar *sysprof_symbol_map_lookup (SysprofSymbolMap *self,
gint64 time,
gint32 pid,
SysprofCaptureAddress addr,
GQuark *tag);
void sysprof_symbol_map_printf (SysprofSymbolMap *self);
gboolean sysprof_symbol_map_serialize (SysprofSymbolMap *self,
gint fd);
gboolean sysprof_symbol_map_deserialize (SysprofSymbolMap *self,
gint byte_order,
gint fd);
void sysprof_symbol_map_free (SysprofSymbolMap *self);
G_END_DECLS

105
src/tests/test-addr-map.c Normal file
View File

@ -0,0 +1,105 @@
#include <fcntl.h>
#include <sysprof.h>
#include "sysprof-platform.h"
#include "sysprof-symbol-map.h"
static GMainLoop *main_loop;
static void *
resolve_in_thread (gpointer data)
{
SysprofCaptureReader *reader = data;
g_autoptr(SysprofSymbolResolver) kernel = NULL;
g_autoptr(SysprofSymbolResolver) elf = NULL;
SysprofCaptureFrameType type;
SysprofSymbolMap *map;
gboolean r;
int fd;
g_assert (reader != NULL);
map = sysprof_symbol_map_new ();
kernel = sysprof_kernel_symbol_resolver_new ();
elf = sysprof_elf_symbol_resolver_new ();
sysprof_symbol_map_add_resolver (map, kernel);
sysprof_symbol_map_add_resolver (map, elf);
sysprof_symbol_map_resolve (map, reader);
fd = sysprof_memfd_create ("decode-test");
g_assert_cmpint (fd, !=, -1);
r = sysprof_symbol_map_serialize (map, fd);
g_assert_true (r);
sysprof_symbol_map_free (map);
/* Reset some state */
sysprof_capture_reader_reset (reader);
lseek (fd, SEEK_SET, 0);
/* Now desrialize it */
map = sysprof_symbol_map_new ();
sysprof_symbol_map_deserialize (map, G_BYTE_ORDER, fd);
/* Now try to print some stack traces */
while (sysprof_capture_reader_peek_type (reader, &type))
{
if (type == SYSPROF_CAPTURE_FRAME_SAMPLE)
{
const SysprofCaptureSample *sample = NULL;
if (!(sample = sysprof_capture_reader_read_sample (reader)))
break;
for (guint j = 0; j < sample->n_addrs; j++)
{
const gchar *name;
GQuark tag;
if (!(name = sysprof_symbol_map_lookup (map, sample->frame.time, sample->frame.pid, sample->addrs[j], &tag)))
name = "Unknown symbol";
g_print ("%u: %s\n", j, name);
}
g_print ("======\n");
}
else if (!sysprof_capture_reader_skip (reader))
break;
}
sysprof_symbol_map_free (map);
close (fd);
g_main_loop_quit (main_loop);
return NULL;
}
gint
main (gint argc,
gchar *argv[])
{
g_autoptr(SysprofCaptureReader) reader = NULL;
g_autoptr(GError) error = NULL;
if (argc != 2)
{
g_printerr ("usage: %s CAPTURE_FILE\n", argv[0]);
return 1;
}
if (!(reader = sysprof_capture_reader_new (argv[1], &error)))
{
g_printerr ("%s\n", error->message);
return 1;
}
main_loop = g_main_loop_new (NULL, FALSE);
g_thread_new ("reader-thread", resolve_in_thread, reader);
g_main_loop_run (main_loop);
return 0;
}