Files
sysprof/tracker.c
Damien Lespiau d1f73304eb tracker: Fix the map look up
A few commits ago, an attempt to speed the map look up was done.
Unfortunatly, it was missing the case where you actually hit the speed
up (once the map is the first element of the array, you never return
it).

So, make sure that if i is 0, you return the first element of array,
while still doing the array reordering is i > 0.
2011-07-15 12:30:48 -04:00

1129 lines
22 KiB
C

#include <glib.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <glib/gprintf.h>
#include "tracker.h"
#include "stackstash.h"
#include "binfile.h"
#include "elfparser.h"
#include "perf_counter.h"
typedef struct new_process_t new_process_t;
typedef struct new_map_t new_map_t;
typedef struct sample_t sample_t;
typedef struct fork_t fork_t;
typedef struct exit_t exit_t;
struct tracker_t
{
StackStash *stash;
size_t n_event_bytes;
size_t n_allocated_bytes;
uint8_t * events;
};
typedef enum
{
NEW_PROCESS,
NEW_MAP,
SAMPLE,
FORK,
EXIT
} event_type_t;
struct new_process_t
{
uint32_t header;
char command_line[256];
};
struct fork_t
{
uint32_t header;
int32_t child_pid;
};
struct exit_t
{
uint32_t header;
};
struct new_map_t
{
uint32_t header;
char filename[PATH_MAX];
uint64_t start;
uint64_t end;
uint64_t offset;
uint64_t inode;
};
struct sample_t
{
uint32_t header;
StackNode * trace;
};
#define TYPE_SHIFT 29
#define PID_MASK ((uint32_t)((1 << TYPE_SHIFT) - 1))
#define MAKE_HEADER(type, pid) \
((uint32_t)((((uint32_t)pid) & PID_MASK) | (type << TYPE_SHIFT)))
#define GET_PID(header) \
(header & PID_MASK)
#define GET_TYPE(header) \
(header >> TYPE_SHIFT)
#define DEFAULT_SIZE (1024 * 1024 * 4)
static char **
get_lines (const char *format, pid_t pid)
{
char *filename = g_strdup_printf (format, pid);
char **result = NULL;
char *contents;
if (g_file_get_contents (filename, &contents, NULL, NULL))
{
result = g_strsplit (contents, "\n", -1);
g_free (contents);
}
g_free (filename);
return result;
}
static void
fake_new_process (tracker_t *tracker, pid_t pid)
{
char **lines;
gboolean done = FALSE;
if ((lines = get_lines ("/proc/%d/cmdline", pid)))
{
if (lines[0] && strlen (lines[0]) > 0)
{
tracker_add_process (tracker, pid, lines[0]);
done = TRUE;
}
g_strfreev (lines);
}
if (!done && (lines = get_lines ("/proc/%d/status", pid)))
{
int i;
for (i = 0; lines[i] != NULL; ++i)
{
if (strncmp ("Name:", lines[i], 5) == 0)
{
char *name = g_strstrip (strchr (lines[i], ':') + 1);
if (strlen (name) > 0)
{
tracker_add_process (tracker, pid, name);
done = TRUE;
break;
}
}
}
g_strfreev (lines);
}
if (!done)
g_print ("failed to fake %d\n", pid);
}
static void
fake_new_map (tracker_t *tracker, pid_t pid)
{
char **lines;
if ((lines = get_lines ("/proc/%d/maps", pid)))
{
int i;
for (i = 0; lines[i] != NULL; ++i)
{
char file[256];
gulong start;
gulong end;
gulong offset;
gulong inode;
int count;
file[255] = '\0';
count = sscanf (
lines[i], "%lx-%lx %*15s %lx %*x:%*x %lu %255s",
&start, &end, &offset, &inode, file);
if (count == 5)
{
if (strcmp (file, "[vdso]") == 0)
{
/* For the vdso, the kernel reports 'offset' as the
* the same as the mapping addres. This doesn't make
* any sense to me, so we just zero it here. There
* is code in binfile.c (read_inode) that returns 0
* for [vdso].
*/
offset = 0;
inode = 0;
}
tracker_add_map (tracker, pid, start, end, offset, inode, file);
}
}
g_strfreev (lines);
}
}
static void
populate_from_proc (tracker_t *tracker)
{
GDir *proc = g_dir_open ("/proc", 0, NULL);
const char *name;
if (!proc)
return;
while ((name = g_dir_read_name (proc)))
{
pid_t pid;
char *end;
pid = strtol (name, &end, 10);
if (*end == 0)
{
fake_new_process (tracker, pid);
fake_new_map (tracker, pid);
}
}
g_dir_close (proc);
}
static double
timeval_to_ms (const GTimeVal *timeval)
{
return (timeval->tv_sec * G_USEC_PER_SEC + timeval->tv_usec) / 1000.0;
}
static double
time_diff (const GTimeVal *first,
const GTimeVal *second)
{
double first_ms = timeval_to_ms (first);
double second_ms = timeval_to_ms (second);
return first_ms - second_ms;
}
tracker_t *
tracker_new (void)
{
tracker_t *tracker = g_new0 (tracker_t, 1);
GTimeVal before, after;
tracker->n_event_bytes = 0;
tracker->n_allocated_bytes = DEFAULT_SIZE;
tracker->events = g_malloc (DEFAULT_SIZE);
tracker->stash = stack_stash_new (NULL);
g_get_current_time (&before);
populate_from_proc (tracker);
g_get_current_time (&after);
#if 0
g_print ("Time to populate %f\n", time_diff (&after, &before));
#endif
return tracker;
}
void
tracker_free (tracker_t *tracker)
{
stack_stash_unref (tracker->stash);
g_free (tracker->events);
g_free (tracker);
}
#define COPY_STRING(dest, src) \
do \
{ \
strncpy (dest, src, sizeof (dest) - 1); \
dest[sizeof (dest) - 1] = 0; \
} \
while (0)
static void
tracker_append (tracker_t *tracker,
void *event,
int n_bytes)
{
if (tracker->n_allocated_bytes - tracker->n_event_bytes < n_bytes)
{
size_t new_size = tracker->n_allocated_bytes * 2;
tracker->events = g_realloc (tracker->events, new_size);
tracker->n_allocated_bytes = new_size;
}
g_assert (tracker->n_allocated_bytes - tracker->n_event_bytes >= n_bytes);
memcpy (tracker->events + tracker->n_event_bytes, event, n_bytes);
#if 0
g_print (" (address %p)\n", tracker->events + tracker->n_event_bytes);
#endif
tracker->n_event_bytes += n_bytes;
}
void
tracker_add_process (tracker_t * tracker,
pid_t pid,
const char *command_line)
{
new_process_t event;
#if 0
g_print ("Add new process %s %d to tracker ", command_line, pid);
#endif
event.header = MAKE_HEADER (NEW_PROCESS, pid);
COPY_STRING (event.command_line, command_line);
tracker_append (tracker, &event, sizeof (event));
}
void
tracker_add_fork (tracker_t *tracker,
pid_t pid,
pid_t child_pid)
{
fork_t event;
event.header = MAKE_HEADER(FORK, pid);
event.child_pid = child_pid;
tracker_append (tracker, &event, sizeof (event));
}
void
tracker_add_exit (tracker_t *tracker,
pid_t pid)
{
exit_t event;
event.header = MAKE_HEADER (EXIT, pid);
tracker_append (tracker, &event, sizeof (event));
}
void
tracker_add_map (tracker_t * tracker,
pid_t pid,
uint64_t start,
uint64_t end,
uint64_t offset,
uint64_t inode,
const char *filename)
{
new_map_t event;
event.header = MAKE_HEADER (NEW_MAP, pid);
COPY_STRING (event.filename, filename);
event.start = start;
event.end = end;
event.offset = offset;
event.inode = inode;
tracker_append (tracker, &event, sizeof (event));
}
void
tracker_add_sample (tracker_t *tracker,
pid_t pid,
uint64_t *ips,
int n_ips)
{
sample_t event;
event.header = MAKE_HEADER (SAMPLE, pid);
event.trace = stack_stash_add_trace (tracker->stash, ips, n_ips, 1);
tracker_append (tracker, &event, sizeof (event));
}
/* */
typedef struct state_t state_t;
typedef struct process_t process_t;
typedef struct map_t map_t;
struct process_t
{
pid_t pid;
char * comm;
GArray * maps;
};
struct map_t
{
char * filename;
uint64_t start;
uint64_t end;
uint64_t offset;
uint64_t inode;
};
struct state_t
{
GHashTable *processes_by_pid;
GHashTable *unique_comms;
GHashTable *unique_symbols;
GHashTable *bin_files;
};
static const map_t *
process_locate_map (process_t *process, gulong addr)
{
GArray *maps = process->maps;
int i;
for (i = 0; i < process->maps->len; ++i)
{
map_t *map = &g_array_index (maps, map_t, i);
if (addr >= map->start && addr < map->end)
{
if (i > 0)
{
map_t tmp = *map;
memmove (&(g_array_index (maps, map_t, 1)),
&(g_array_index (maps, map_t, 0)),
i * sizeof (map_t));
g_array_index (maps, map_t, 0) = tmp;
}
return &g_array_index (maps, map_t, 0);
}
}
return NULL;
}
static void
create_map (state_t *state, new_map_t *new_map)
{
process_t *process;
map_t map;
int i;
pid_t pid = GET_PID (new_map->header);
process = g_hash_table_lookup (
state->processes_by_pid, GINT_TO_POINTER (pid));
if (!process)
return;
map.filename = g_strdup (new_map->filename);
map.start = new_map->start;
map.end = new_map->end;
map.offset = new_map->offset;
map.inode = new_map->inode;
/* Remove existing maps that overlap the new one */
for (i = 0; i < process->maps->len; ++i)
{
map_t *m = &g_array_index (process->maps, map_t, i);
if (m->start < map.end && m->end > map.start)
{
g_free (m->filename);
g_array_remove_index (process->maps, i);
}
}
g_array_append_vals (process->maps, &map, 1);
}
static void
destroy_process (process_t *process)
{
int i;
g_free (process->comm);
for (i = 0; i < process->maps->len; ++i)
{
map_t *map = &g_array_index (process->maps, map_t, i);
g_free (map->filename);
}
g_array_free (process->maps, TRUE);
g_free (process);
}
static void
create_process (state_t *state, new_process_t *new_process)
{
pid_t pid = GET_PID (new_process->header);
const char *comm = new_process->command_line;
process_t *process =
g_hash_table_lookup (state->processes_by_pid, GINT_TO_POINTER (pid));
if (process)
{
g_free (process->comm);
process->comm = g_strdup (comm);
}
else
{
process = g_new0 (process_t, 1);
process->pid = pid;
process->comm = g_strdup (comm);
process->maps = g_array_new (FALSE, FALSE, sizeof (map_t));
g_hash_table_insert (
state->processes_by_pid, GINT_TO_POINTER (process->pid), process);
}
}
static void
process_fork (state_t *state, fork_t *fork)
{
pid_t ppid = GET_PID (fork->header);
process_t *parent = g_hash_table_lookup (
state->processes_by_pid, GINT_TO_POINTER (ppid));
process_t *child;
if (ppid == fork->child_pid)
{
/* Just a new thread being spawned */
return;
}
child = g_new0 (process_t, 1);
if (parent)
child->comm = g_strdup (parent->comm);
else
child->comm = g_strdup_printf ("[pid %d]", fork->child_pid);
child->pid = fork->child_pid;
child->maps = g_array_new (FALSE, FALSE, sizeof (map_t));
if (parent)
{
int i;
for (i = 0; i < parent->maps->len; ++i)
{
map_t copy = g_array_index (parent->maps, map_t, i);
copy.filename = g_strdup (copy.filename);
g_array_append_val (child->maps, copy);
}
}
g_hash_table_insert (
state->processes_by_pid, GINT_TO_POINTER (child->pid), child);
}
static void
process_exit (state_t *state, exit_t *exit)
{
#if 0
g_print ("Exit for %d\n", exit->pid);
#endif
/* ignore for now */
}
static void
free_process (gpointer data)
{
process_t *process = data;
destroy_process (process);
}
static state_t *
state_new (void)
{
state_t *state = g_new0 (state_t, 1);
state->processes_by_pid =
g_hash_table_new_full (g_direct_hash, g_direct_equal,
NULL, free_process);
state->unique_symbols = g_hash_table_new (g_direct_hash, g_direct_equal);
state->unique_comms = g_hash_table_new (g_str_hash, g_str_equal);
state->bin_files = g_hash_table_new_full (g_str_hash, g_str_equal,
g_free,
(GDestroyNotify)bin_file_free);
return state;
}
static void
state_free (state_t *state)
{
g_hash_table_destroy (state->processes_by_pid);
g_hash_table_destroy (state->unique_symbols);
g_hash_table_destroy (state->unique_comms);
g_hash_table_destroy (state->bin_files);
g_free (state);
}
typedef struct
{
gulong address;
char *name;
} kernel_symbol_t;
static void
parse_kallsym_line (const char *line, GArray *table)
{
char **tokens = g_strsplit_set (line, " \t", -1);
if (tokens[0] && tokens[1] && tokens[2])
{
glong address;
char *endptr;
address = strtoul (tokens[0], &endptr, 16);
if (*endptr == '\0' &&
(strcmp (tokens[1], "T") == 0 ||
strcmp (tokens[1], "t") == 0))
{
kernel_symbol_t sym;
sym.address = address;
sym.name = g_strdup (tokens[2]);
g_array_append_val (table, sym);
}
}
g_strfreev (tokens);
}
static gboolean
parse_kallsyms (const char *kallsyms,
GArray *table)
{
const char *sol;
const char *eol;
sol = kallsyms;
eol = strchr (sol, '\n');
while (eol)
{
char *line = g_strndup (sol, eol - sol);
parse_kallsym_line (line, table);
g_free (line);
sol = eol + 1;
eol = strchr (sol, '\n');
}
if (table->len <= 1)
return FALSE;
return TRUE;
}
static int
compare_syms (gconstpointer a, gconstpointer b)
{
const kernel_symbol_t *sym_a = a;
const kernel_symbol_t *sym_b = b;
if (sym_a->address > sym_b->address)
return 1;
else if (sym_a->address == sym_b->address)
return 0;
else
return -1;
}
static kernel_symbol_t *
do_lookup (kernel_symbol_t *symbols,
gulong address,
int first,
int last)
{
if (address >= symbols[last].address)
{
return &(symbols[last]);
}
else if (last - first < 3)
{
while (last >= first)
{
if (address >= symbols[last].address)
return &(symbols[last]);
last--;
}
return NULL;
}
else
{
int mid = (first + last) / 2;
if (symbols[mid].address > address)
return do_lookup (symbols, address, first, mid);
else
return do_lookup (symbols, address, mid, last);
}
}
static GArray *
get_kernel_symbols (void)
{
static GArray *kernel_syms;
static gboolean initialized = FALSE;
#if 0
find_kernel_binary();
#endif
if (!initialized)
{
char *kallsyms;
if (g_file_get_contents ("/proc/kallsyms", &kallsyms, NULL, NULL))
{
if (kallsyms)
{
kernel_syms = g_array_new (TRUE, TRUE, sizeof (kernel_symbol_t));
if (parse_kallsyms (kallsyms, kernel_syms))
{
g_array_sort (kernel_syms, compare_syms);
}
else
{
g_array_free (kernel_syms, TRUE);
kernel_syms = NULL;
}
}
g_free (kallsyms);
}
if (!kernel_syms)
{
g_print ("Warning: /proc/kallsyms could not be "
"read. Kernel symbols will not be available\n");
}
initialized = TRUE;
}
return kernel_syms;
}
static const char skip_kernel_symbols[][32] =
{
/* IRQ stack */
"common_interrupt",
"apic_timer_interrupt",
"smp_apic_timer_interrupt",
"hrtimer_interrupt",
"__run_hrtimer",
"perf_swevent_hrtimer",
"perf_event_overflow",
"__perf_event_overflow",
"perf_prepare_sample",
"perf_callchain",
"perf_swcounter_hrtimer",
"perf_counter_overflow",
"__perf_counter_overflow",
"perf_counter_output",
/* NMI stack */
"nmi_stack_correct",
"do_nmi",
"notify_die",
"atomic_notifier_call_chain",
"notifier_call_chain",
"perf_event_nmi_handler",
"perf_counter_nmi_handler",
"intel_pmu_handle_irq",
"perf_event_overflow",
"perf_counter_overflow",
"__perf_event_overflow",
"perf_prepare_sample",
"perf_callchain",
""
};
const char *
lookup_kernel_symbol (gulong address)
{
kernel_symbol_t *result;
GArray *ksyms = get_kernel_symbols ();
const char *sym;
int i;
if (ksyms->len == 0)
return NULL;
result = do_lookup ((kernel_symbol_t *)ksyms->data,
address, 0, ksyms->len - 1);
sym = result? result->name : NULL;
/* This is a workaround for a kernel bug, where it reports not
* only the kernel stack, but also the IRQ stack for the
* timer interrupt that generated the stack.
*
* The stack as reported by the kernel looks like this:
*
* [ip] [irq stack] [real kernel stack]
*
* Below we filter out the [irq stack]
*/
i = 0;
while (skip_kernel_symbols[i][0] != '\0')
{
if (strcmp (sym, skip_kernel_symbols[i]) == 0)
{
sym = NULL;
break;
}
i++;
}
return sym;
}
/* Note that 'unique_symbols' is a direct_hash table. Ie., we
* rely on the address of symbol strings being different for different
* symbols.
*/
static char *
unique_dup (GHashTable *unique_symbols, const char *sym)
{
char *result;
result = g_hash_table_lookup (unique_symbols, sym);
if (!result)
{
result = elf_demangle (sym);
g_hash_table_insert (unique_symbols, (char *)sym, result);
}
return result;
}
static const char *
make_message (state_t *state, const char *format, ...)
{
va_list args;
char *message;
char *result;
va_start (args, format);
g_vasprintf (&message, format, args);
va_end (args);
result = g_hash_table_lookup (state->unique_comms, message);
if (result)
{
g_free (message);
}
else
{
result = message;
g_hash_table_insert (state->unique_comms, result, result);
}
return result;
}
static bin_file_t *
state_get_bin_file (state_t *state, const char *filename)
{
bin_file_t *bf = g_hash_table_lookup (state->bin_files, filename);
if (!bf)
{
bf = bin_file_new (filename);
g_hash_table_insert (state->bin_files, g_strdup (filename), bf);
}
return bf;
}
static char *
lookup_symbol (state_t *state,
process_t *process,
uint64_t address,
gboolean kernel)
{
const char *sym;
g_assert (process);
if (kernel)
{
sym = lookup_kernel_symbol (address);
}
else
{
const map_t *map = process_locate_map (process, address);
if (!map)
{
sym = make_message (state, "No map [%s]", process->comm);
}
else
{
bin_file_t *bin_file = state_get_bin_file (state, map->filename);
const bin_symbol_t *bin_sym;
address -= map->start;
address += map->offset;
if (map->inode && !bin_file_check_inode (bin_file, map->inode))
{
/* If the inodes don't match, it's probably because the
* file has changed since the process was started.
*/
sym = make_message (state, "%s: inode mismatch", map->filename);
}
else
{
bin_sym = bin_file_lookup_symbol (bin_file, address);
sym = bin_symbol_get_name (bin_file, bin_sym);
}
}
}
if (sym)
return unique_dup (state->unique_symbols, sym);
else
return NULL;
}
typedef struct context_info_t context_info_t;
struct context_info_t
{
enum perf_callchain_context context;
char name[32];
};
static const context_info_t context_info[] =
{
{ PERF_CONTEXT_HV, "- - hypervisor - -" },
{ PERF_CONTEXT_KERNEL, "- - kernel - -" },
{ PERF_CONTEXT_USER, "- - user - - " },
{ PERF_CONTEXT_GUEST, "- - guest - - " },
{ PERF_CONTEXT_GUEST_KERNEL, "- - guest kernel - -" },
{ PERF_CONTEXT_GUEST_USER, "- - guest user - -" },
};
static const char *const everything = "[Everything]";
static const context_info_t *
get_context_info (enum perf_callchain_context context)
{
int i;
for (i = 0; i < sizeof (context_info) / sizeof (context_info[0]); ++i)
{
const context_info_t *info = &context_info[i];
if (info->context == context)
return info;
}
return NULL;
}
static void
process_sample (state_t *state, StackStash *resolved, sample_t *sample)
{
const context_info_t *context = NULL;
const char *cmdline;
uint64_t stack_addrs[256];
uint64_t *resolved_traces;
process_t *process;
StackNode *n;
int len;
pid_t pid = GET_PID (sample->header);
process = g_hash_table_lookup (
state->processes_by_pid, GINT_TO_POINTER (pid));
if (!process)
{
static gboolean warned;
if (!warned || pid != 0)
{
#if 0
g_print ("sample for unknown process %d\n", sample->pid);
#endif
warned = TRUE;
}
return;
}
len = 5;
for (n = sample->trace; n != NULL; n = n->parent)
len++;
if (len > 256)
resolved_traces = g_new (uint64_t, len);
else
resolved_traces = stack_addrs;
len = 0;
for (n = sample->trace; n != NULL; n = n->parent)
{
uint64_t address = n->data;
const context_info_t *new_context;
const char *symbol;
new_context = get_context_info (address);
if (new_context)
{
if (context)
symbol = unique_dup (state->unique_symbols, context->name);
else
symbol = NULL;
context = new_context;
}
else
{
gboolean kernel = context && context->context == PERF_CONTEXT_KERNEL;
symbol = lookup_symbol (state, process, address, kernel);
}
if (symbol)
resolved_traces[len++] = POINTER_TO_U64 (symbol);
}
if (context && context->context != PERF_CONTEXT_USER)
{
/* Kernel threads do not have a user part, so we end up here
* without ever getting a user context. If this happens,
* add the '- - kernel - - ' name, so that kernel threads
* are properly blamed on the kernel
*/
resolved_traces[len++] =
POINTER_TO_U64 (unique_dup (state->unique_symbols, context->name));
}
cmdline = make_message (state, "[%s]", process->comm);
resolved_traces[len++] = POINTER_TO_U64 (cmdline);
resolved_traces[len++] = POINTER_TO_U64 (
unique_dup (state->unique_symbols, everything));
stack_stash_add_trace (resolved, resolved_traces, len, 1);
if (resolved_traces != stack_addrs)
g_free (resolved_traces);
}
Profile *
tracker_create_profile (tracker_t *tracker)
{
uint8_t *end = tracker->events + tracker->n_event_bytes;
StackStash *resolved_stash;
Profile *profile;
state_t *state;
uint8_t *event;
state = state_new ();
resolved_stash = stack_stash_new (g_free);
event = tracker->events;
while (event < end)
{
event_type_t type = GET_TYPE (*(uint32_t *)event);
switch (type)
{
case NEW_PROCESS:
create_process (state, (new_process_t *)event);
event += sizeof (new_process_t);
break;
case NEW_MAP:
create_map (state, (new_map_t *)event);
event += sizeof (new_map_t);
break;
case FORK:
process_fork (state, (fork_t *)event);
event += sizeof (fork_t);
break;
case EXIT:
process_exit (state, (exit_t *)event);
event += sizeof (exit_t);
break;
case SAMPLE:
process_sample (state, resolved_stash, (sample_t *)event);
event += sizeof (sample_t);
break;
}
}
profile = profile_new (resolved_stash);
state_free (state);
stack_stash_unref (resolved_stash);
return profile;
}