memprof: add memory profiling using LD_PRELOAD

This brings over some of the techniques from the old memprof design.
Sysprof and memprof shared a lot of code, so it is pretty natural to
bring back the same callgraph view based on memory allocations.

This reuses the StackStash just like it did in memprof. While it
would be nice to reuse some existing tools out there, the fit of
memprof with sysprof is so naturally aligned, it's not really a
big deal to bring back the LD_PRELOAD. The value really comes
from seeing all this stuff together instead of multiple apps.

There are plenty of things we can implement on top of this that
we are not doing yet such as temporary allocations, cross-thread
frees, graphing the heap, and graphing differences between the
heap at to points in time. I'd like all of these things, given
enough time to make them useful.

This is still a bit slow though due to the global lock we take
to access the writer. To improve the speed here we need to get
rid of that lock and head towards a design that allows a thread
to request a new writer from Sysprof and save it in TLS (to be
destroyed when the thread exits).
This commit is contained in:
Christian Hergert
2020-01-30 18:24:04 -08:00
parent cae70498da
commit 33c81a3a9c
41 changed files with 6817 additions and 193 deletions

216
src/libsysprof/rax.h Normal file
View File

@ -0,0 +1,216 @@
/* Rax -- A radix tree implementation.
*
* Copyright (c) 2017-2018, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef RAX_H
#define RAX_H
#include <stdint.h>
/* Representation of a radix tree as implemented in this file, that contains
* the strings "foo", "foobar" and "footer" after the insertion of each
* word. When the node represents a key inside the radix tree, we write it
* between [], otherwise it is written between ().
*
* This is the vanilla representation:
*
* (f) ""
* \
* (o) "f"
* \
* (o) "fo"
* \
* [t b] "foo"
* / \
* "foot" (e) (a) "foob"
* / \
* "foote" (r) (r) "fooba"
* / \
* "footer" [] [] "foobar"
*
* However, this implementation implements a very common optimization where
* successive nodes having a single child are "compressed" into the node
* itself as a string of characters, each representing a next-level child,
* and only the link to the node representing the last character node is
* provided inside the representation. So the above representation is turend
* into:
*
* ["foo"] ""
* |
* [t b] "foo"
* / \
* "foot" ("er") ("ar") "foob"
* / \
* "footer" [] [] "foobar"
*
* However this optimization makes the implementation a bit more complex.
* For instance if a key "first" is added in the above radix tree, a
* "node splitting" operation is needed, since the "foo" prefix is no longer
* composed of nodes having a single child one after the other. This is the
* above tree and the resulting node splitting after this event happens:
*
*
* (f) ""
* /
* (i o) "f"
* / \
* "firs" ("rst") (o) "fo"
* / \
* "first" [] [t b] "foo"
* / \
* "foot" ("er") ("ar") "foob"
* / \
* "footer" [] [] "foobar"
*
* Similarly after deletion, if a new chain of nodes having a single child
* is created (the chain must also not include nodes that represent keys),
* it must be compressed back into a single node.
*
*/
#define RAX_NODE_MAX_SIZE ((1<<29)-1)
typedef struct raxNode {
uint32_t iskey:1; /* Does this node contain a key? */
uint32_t isnull:1; /* Associated value is NULL (don't store it). */
uint32_t iscompr:1; /* Node is compressed. */
uint32_t size:29; /* Number of children, or compressed string len. */
/* Data layout is as follows:
*
* If node is not compressed we have 'size' bytes, one for each children
* character, and 'size' raxNode pointers, point to each child node.
* Note how the character is not stored in the children but in the
* edge of the parents:
*
* [header iscompr=0][abc][a-ptr][b-ptr][c-ptr](value-ptr?)
*
* if node is compressed (iscompr bit is 1) the node has 1 children.
* In that case the 'size' bytes of the string stored immediately at
* the start of the data section, represent a sequence of successive
* nodes linked one after the other, for which only the last one in
* the sequence is actually represented as a node, and pointed to by
* the current compressed node.
*
* [header iscompr=1][xyz][z-ptr](value-ptr?)
*
* Both compressed and not compressed nodes can represent a key
* with associated data in the radix tree at any level (not just terminal
* nodes).
*
* If the node has an associated key (iskey=1) and is not NULL
* (isnull=0), then after the raxNode pointers poiting to the
* children, an additional value pointer is present (as you can see
* in the representation above as "value-ptr" field).
*/
unsigned char data[];
} raxNode;
typedef struct rax {
raxNode *head;
uint64_t numele;
uint64_t numnodes;
} rax;
/* Stack data structure used by raxLowWalk() in order to, optionally, return
* a list of parent nodes to the caller. The nodes do not have a "parent"
* field for space concerns, so we use the auxiliary stack when needed. */
#define RAX_STACK_STATIC_ITEMS 32
typedef struct raxStack {
void **stack; /* Points to static_items or an heap allocated array. */
size_t items, maxitems; /* Number of items contained and total space. */
/* Up to RAXSTACK_STACK_ITEMS items we avoid to allocate on the heap
* and use this static array of pointers instead. */
void *static_items[RAX_STACK_STATIC_ITEMS];
int oom; /* True if pushing into this stack failed for OOM at some point. */
} raxStack;
/* Optional callback used for iterators and be notified on each rax node,
* including nodes not representing keys. If the callback returns true
* the callback changed the node pointer in the iterator structure, and the
* iterator implementation will have to replace the pointer in the radix tree
* internals. This allows the callback to reallocate the node to perform
* very special operations, normally not needed by normal applications.
*
* This callback is used to perform very low level analysis of the radix tree
* structure, scanning each possible node (but the root node), or in order to
* reallocate the nodes to reduce the allocation fragmentation (this is the
* Redis application for this callback).
*
* This is currently only supported in forward iterations (raxNext) */
typedef int (*raxNodeCallback)(raxNode **noderef);
/* Radix tree iterator state is encapsulated into this data structure. */
#define RAX_ITER_STATIC_LEN 128
#define RAX_ITER_JUST_SEEKED (1<<0) /* Iterator was just seeked. Return current
element for the first iteration and
clear the flag. */
#define RAX_ITER_EOF (1<<1) /* End of iteration reached. */
#define RAX_ITER_SAFE (1<<2) /* Safe iterator, allows operations while
iterating. But it is slower. */
typedef struct raxIterator {
int flags;
rax *rt; /* Radix tree we are iterating. */
unsigned char *key; /* The current string. */
void *data; /* Data associated to this key. */
size_t key_len; /* Current key length. */
size_t key_max; /* Max key len the current key buffer can hold. */
unsigned char key_static_string[RAX_ITER_STATIC_LEN];
raxNode *node; /* Current node. Only for unsafe iteration. */
raxStack stack; /* Stack used for unsafe iteration. */
raxNodeCallback node_cb; /* Optional node callback. Normally set to NULL. */
} raxIterator;
/* A special pointer returned for not found items. */
extern void *raxNotFound;
/* Exported API. */
rax *raxNew(void);
int raxInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxTryInsert(rax *rax, unsigned char *s, size_t len, void *data, void **old);
int raxRemove(rax *rax, unsigned char *s, size_t len, void **old);
void *raxFind(rax *rax, unsigned char *s, size_t len);
void raxFree(rax *rax);
void raxFreeWithCallback(rax *rax, void (*free_callback)(void*));
void raxStart(raxIterator *it, rax *rt);
int raxSeek(raxIterator *it, const char *op, unsigned char *ele, size_t len);
int raxNext(raxIterator *it);
int raxPrev(raxIterator *it);
int raxRandomWalk(raxIterator *it, size_t steps);
int raxCompare(raxIterator *iter, const char *op, unsigned char *key, size_t key_len);
void raxStop(raxIterator *it);
int raxEOF(raxIterator *it);
void raxShow(rax *rax);
uint64_t raxSize(rax *rax);
unsigned long raxTouch(raxNode *n);
void raxSetDebugMsg(int onoff);
/* Internal API. May be used by the node callback in order to access rax nodes
* in a low level way, so this function is exported as well. */
void raxSetData(raxNode *n, void *data);
#endif