diff --git a/contrib/elfparser/elfparser.c b/contrib/elfparser/elfparser.c index 2c6b638b..58f1cec3 100644 --- a/contrib/elfparser/elfparser.c +++ b/contrib/elfparser/elfparser.c @@ -27,6 +27,7 @@ #include "demangle.h" #include "elfparser.h" +#include "rust-demangle.h" typedef struct Section Section; @@ -484,7 +485,15 @@ elf_parser_free (ElfParser *parser) gchar * elf_demangle (const char *name) { - gchar *demangled = sysprof_cplus_demangle (name); + /* Try demangling as rust symbol first as legacy rust symbols can demangle as C++ symbols too + * but will only get partially demangled in that case. + */ + gchar *demangled = sysprof_rust_demangle (name, 0); + + if (demangled) + return demangled; + + demangled = sysprof_cplus_demangle (name); if (demangled) return demangled; diff --git a/contrib/elfparser/meson.build b/contrib/elfparser/meson.build index b3c75e6b..d1c7e68b 100644 --- a/contrib/elfparser/meson.build +++ b/contrib/elfparser/meson.build @@ -1,5 +1,6 @@ libelfparser_sources = [ 'demangle.cpp', + 'rust-demangle.c', 'elfparser.c', ] diff --git a/contrib/elfparser/rust-demangle.c b/contrib/elfparser/rust-demangle.c new file mode 100644 index 00000000..78df8d92 --- /dev/null +++ b/contrib/elfparser/rust-demangle.c @@ -0,0 +1,1268 @@ +/* +Imported from https://github.com/LykenSol/rust-demangle.c/pull/2 commit ea6fddfbf526700ee989336d9ff78797e38365eb +Modifications from upstream: +* Add sysprof_ prefix to exported symbols +*/ + +// FIXME(eddyb) should this use ``? +#include "rust-demangle.h" + +#include +#include +#include +#include + +struct rust_demangler { + const char *sym; + size_t sym_len; + + void *callback_opaque; + void (*callback)(const char *data, size_t len, void *opaque); + + // Position of the next character to read from the symbol. + size_t next; + + // `true` if any error occurred. + bool errored; + + // `true` if nothing should be printed. + bool skipping_printing; + + // `true` if printing should be verbose (e.g. include hashes). + bool verbose; + + // Rust mangling version, with legacy mangling being -1. + int version; + + uint64_t bound_lifetime_depth; +}; + +#define ERROR_AND(x) \ + do { \ + rdm->errored = true; \ + x; \ + } while (0) +#define CHECK_OR(cond, x) \ + do { \ + if (!(cond)) \ + ERROR_AND(x); \ + } while (0) + +// FIXME(eddyb) consider renaming these to not start with `IS` (UB?). +#define IS_DIGIT(c) ((c) >= '0' && (c) <= '9') +#define IS_UPPER(c) ((c) >= 'A' && (c) <= 'Z') +#define IS_LOWER(c) ((c) >= 'a' && (c) <= 'z') + +// Parsing functions. + +static char peek(const struct rust_demangler *rdm) { + if (rdm->next < rdm->sym_len) + return rdm->sym[rdm->next]; + return 0; +} + +static bool eat(struct rust_demangler *rdm, char c) { + if (peek(rdm) == c) { + rdm->next++; + return true; + } else + return false; +} + +static char next(struct rust_demangler *rdm) { + char c = peek(rdm); + CHECK_OR(c, return 0); + rdm->next++; + return c; +} + +static uint64_t parse_integer_62(struct rust_demangler *rdm) { + if (eat(rdm, '_')) + return 0; + + uint64_t x = 0; + while (!eat(rdm, '_')) { + char c = next(rdm); + x *= 62; + if (IS_DIGIT(c)) + x += c - '0'; + else if (IS_LOWER(c)) + x += 10 + (c - 'a'); + else if (IS_UPPER(c)) + x += 10 + 26 + (c - 'A'); + else + ERROR_AND(return 0); + } + return x + 1; +} + +static uint64_t parse_opt_integer_62(struct rust_demangler *rdm, char tag) { + if (!eat(rdm, tag)) + return 0; + return 1 + parse_integer_62(rdm); +} + +static uint64_t parse_disambiguator(struct rust_demangler *rdm) { + return parse_opt_integer_62(rdm, 's'); +} + +struct rust_mangled_ident { + // ASCII part of the identifier. + const char *ascii; + size_t ascii_len; + + // Punycode insertion codes for Unicode codepoints, if any. + const char *punycode; + size_t punycode_len; +}; + +static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) { + struct rust_mangled_ident ident; + + ident.ascii = NULL; + ident.ascii_len = 0; + ident.punycode = NULL; + ident.punycode_len = 0; + + bool is_punycode = eat(rdm, 'u'); + + char c = next(rdm); + CHECK_OR(IS_DIGIT(c), return ident); + size_t len = c - '0'; + + if (c != '0') + while (IS_DIGIT(peek(rdm))) + len = len * 10 + (next(rdm) - '0'); + + // Skip past the optional `_` separator. + eat(rdm, '_'); + + size_t start = rdm->next; + rdm->next += len; + // Check for overflows. + CHECK_OR((start <= rdm->next) && (rdm->next <= rdm->sym_len), return ident); + + ident.ascii = rdm->sym + start; + ident.ascii_len = len; + + if (is_punycode) { + ident.punycode_len = 0; + while (ident.ascii_len > 0) { + ident.ascii_len--; + + // The last '_' is a separator between ascii & punycode. + if (ident.ascii[ident.ascii_len] == '_') + break; + + ident.punycode_len++; + } + CHECK_OR(ident.punycode_len > 0, return ident); + ident.punycode = ident.ascii + (len - ident.punycode_len); + } + + if (ident.ascii_len == 0) + ident.ascii = NULL; + + return ident; +} + +// Printing functions. + +static void +print_str(struct rust_demangler *rdm, const char *data, size_t len) { + if (!rdm->errored && !rdm->skipping_printing) + rdm->callback(data, len, rdm->callback_opaque); +} + +#define PRINT(s) print_str(rdm, s, strlen(s)) + +static void print_uint64(struct rust_demangler *rdm, uint64_t x) { + char s[21]; + sprintf(s, "%" PRIu64, x); + PRINT(s); +} + +static void print_uint64_hex(struct rust_demangler *rdm, uint64_t x) { + char s[17]; + sprintf(s, "%" PRIx64, x); + PRINT(s); +} + +static void print_quoted_escaped_char(struct rust_demangler *rdm, char quote, uint32_t c) { + switch (c) { + case '\0': + PRINT("\\0"); + break; + + case '\t': + PRINT("\\t"); + break; + + case '\r': + PRINT("\\r"); + break; + + case '\n': + PRINT("\\n"); + break; + + case '\\': + PRINT("\\\\"); + break; + + case '"': + if (quote == '"') { + PRINT("\\\""); + } else { + PRINT("\""); + } + break; + + case '\'': + if (quote == '\'') { + PRINT("\\'"); + } else { + PRINT("'"); + } + break; + + default: + if (c >= 0x20 && c <= 0x7e) { + // Printable ASCII + char v = (char)c; + print_str(rdm, &v, 1); + } else { + // FIXME show printable unicode characters without hex encoding + PRINT("\\u{"); + char s[9] = {0}; + sprintf(s, "%" PRIx32, c); + PRINT(s); + PRINT("}"); + } + } +} + +static void +print_ident(struct rust_demangler *rdm, struct rust_mangled_ident ident) { + if (rdm->errored || rdm->skipping_printing) + return; + + if (!ident.punycode) { + print_str(rdm, ident.ascii, ident.ascii_len); + return; + } + + size_t len = 0; + size_t cap = 4; + while (cap < ident.ascii_len) { + cap *= 2; + // Check for overflows. + CHECK_OR((cap * 4) / 4 == cap, return ); + } + + // Store the output codepoints as groups of 4 UTF-8 bytes. + uint8_t *out = (uint8_t *)malloc(cap * 4); + CHECK_OR(out, return ); + + // Populate initial output from ASCII fragment. + for (len = 0; len < ident.ascii_len; len++) { + uint8_t *p = out + 4 * len; + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = ident.ascii[len]; + } + + // Punycode parameters and initial state. + size_t base = 36; + size_t t_min = 1; + size_t t_max = 26; + size_t skew = 38; + size_t damp = 700; + size_t bias = 72; + size_t i = 0; + uint32_t c = 0x80; + + size_t punycode_pos = 0; + while (punycode_pos < ident.punycode_len) { + // Read one delta value. + size_t delta = 0; + size_t w = 1; + size_t k = 0; + size_t t; + uint8_t d; + do { + k += base; + t = k < bias ? 0 : (k - bias); + if (t < t_min) + t = t_min; + if (t > t_max) + t = t_max; + + CHECK_OR(punycode_pos < ident.punycode_len, goto cleanup); + d = ident.punycode[punycode_pos++]; + + if (IS_LOWER(d)) + d = d - 'a'; + else if (IS_DIGIT(d)) + d = 26 + (d - '0'); + else + ERROR_AND(goto cleanup); + + delta += d * w; + w *= base - t; + } while (d >= t); + + // Compute the new insert position and character. + len++; + i += delta; + c += i / len; + i %= len; + + // Ensure enough space is available. + if (cap < len) { + cap *= 2; + // Check for overflows. + CHECK_OR((cap * 4) / 4 == cap, goto cleanup); + CHECK_OR(cap >= len, goto cleanup); + } + uint8_t *p = (uint8_t *)realloc(out, cap * 4); + CHECK_OR(p, goto cleanup); + out = p; + + // Move the characters after the insert position. + p = out + i * 4; + memmove(p + 4, p, (len - i - 1) * 4); + + // Insert the new character, as UTF-8 bytes. + p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0; + p[1] = + c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0; + p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f); + p[3] = 0x80 | (c & 0x3f); + + // If there are no more deltas, decoding is complete. + if (punycode_pos == ident.punycode_len) + break; + + i++; + + // Perform bias adaptation. + delta /= damp; + damp = 2; + + delta += delta / len; + k = 0; + while (delta > ((base - t_min) * t_max) / 2) { + delta /= base - t_min; + k += base; + } + bias = k + ((base - t_min + 1) * delta) / (delta + skew); + } + + // Remove all the 0 bytes to leave behind an UTF-8 string. + size_t j; + for (i = 0, j = 0; i < len * 4; i++) + if (out[i] != 0) + out[j++] = out[i]; + + print_str(rdm, (const char *)out, j); + +cleanup: + free(out); +} + +/// Print the lifetime according to the previously decoded index. +/// An index of `0` always refers to `'_`, but starting with `1`, +/// indices refer to late-bound lifetimes introduced by a binder. +static void print_lifetime_from_index(struct rust_demangler *rdm, uint64_t lt) { + PRINT("'"); + if (lt == 0) { + PRINT("_"); + return; + } + + uint64_t depth = rdm->bound_lifetime_depth - lt; + // Try to print lifetimes alphabetically first. + if (depth < 26) { + char c = 'a' + depth; + print_str(rdm, &c, 1); + } else { + // Use `'_123` after running out of letters. + PRINT("_"); + print_uint64(rdm, depth); + } +} + +// Demangling functions. + +static void demangle_binder(struct rust_demangler *rdm); +static void demangle_path(struct rust_demangler *rdm, bool in_value); +static void demangle_generic_arg(struct rust_demangler *rdm); +static void demangle_type(struct rust_demangler *rdm); +static bool demangle_path_maybe_open_generics(struct rust_demangler *rdm); +static void demangle_dyn_trait(struct rust_demangler *rdm); +static void demangle_const(struct rust_demangler *rdm, bool in_value); +static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag); +static void demangle_const_str_literal(struct rust_demangler *rdm); + +/// Optionally enter a binder ('G') for late-bound lifetimes, +/// printing e.g. `for<'a, 'b> `, and make those lifetimes visible +/// to the caller (via depth level, which the caller should reset). +static void demangle_binder(struct rust_demangler *rdm) { + CHECK_OR(!rdm->errored, return ); + + uint64_t bound_lifetimes = parse_opt_integer_62(rdm, 'G'); + if (bound_lifetimes > 0) { + PRINT("for<"); + for (uint64_t i = 0; i < bound_lifetimes; i++) { + if (i > 0) + PRINT(", "); + rdm->bound_lifetime_depth++; + print_lifetime_from_index(rdm, 1); + } + PRINT("> "); + } +} + +static void demangle_path(struct rust_demangler *rdm, bool in_value) { + CHECK_OR(!rdm->errored, return ); + + char tag = next(rdm); + switch (tag) { + case 'C': { + uint64_t dis = parse_disambiguator(rdm); + struct rust_mangled_ident name = parse_ident(rdm); + + print_ident(rdm, name); + if (rdm->verbose) { + PRINT("["); + print_uint64_hex(rdm, dis); + PRINT("]"); + } + break; + } + case 'N': { + char ns = next(rdm); + CHECK_OR(IS_LOWER(ns) || IS_UPPER(ns), return ); + + demangle_path(rdm, in_value); + + uint64_t dis = parse_disambiguator(rdm); + struct rust_mangled_ident name = parse_ident(rdm); + + if (IS_UPPER(ns)) { + // Special namespaces, like closures and shims. + PRINT("::{"); + switch (ns) { + case 'C': + PRINT("closure"); + break; + case 'S': + PRINT("shim"); + break; + default: + print_str(rdm, &ns, 1); + } + if (name.ascii || name.punycode) { + PRINT(":"); + print_ident(rdm, name); + } + PRINT("#"); + print_uint64(rdm, dis); + PRINT("}"); + } else { + // Implementation-specific/unspecified namespaces. + + if (name.ascii || name.punycode) { + PRINT("::"); + print_ident(rdm, name); + } + } + break; + } + case 'M': + case 'X': + // Ignore the `impl`'s own path. + parse_disambiguator(rdm); + bool was_skipping_printing = rdm->skipping_printing; + rdm->skipping_printing = true; + demangle_path(rdm, in_value); + rdm->skipping_printing = was_skipping_printing; + __attribute__((fallthrough)); + case 'Y': + PRINT("<"); + demangle_type(rdm); + if (tag != 'M') { + PRINT(" as "); + demangle_path(rdm, false); + } + PRINT(">"); + break; + case 'I': + demangle_path(rdm, in_value); + if (in_value) + PRINT("::"); + PRINT("<"); + for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { + if (i > 0) + PRINT(", "); + demangle_generic_arg(rdm); + } + PRINT(">"); + break; + case 'B': { + size_t backref = parse_integer_62(rdm); + if (!rdm->skipping_printing) { + size_t old_next = rdm->next; + rdm->next = backref; + demangle_path(rdm, in_value); + rdm->next = old_next; + } + break; + } + default: + ERROR_AND(return ); + } +} + +static void demangle_generic_arg(struct rust_demangler *rdm) { + if (eat(rdm, 'L')) { + uint64_t lt = parse_integer_62(rdm); + print_lifetime_from_index(rdm, lt); + } else if (eat(rdm, 'K')) + demangle_const(rdm, false); + else + demangle_type(rdm); +} + +static const char *basic_type(char tag) { + switch (tag) { + case 'b': + return "bool"; + case 'c': + return "char"; + case 'e': + return "str"; + case 'u': + return "()"; + case 'a': + return "i8"; + case 's': + return "i16"; + case 'l': + return "i32"; + case 'x': + return "i64"; + case 'n': + return "i128"; + case 'i': + return "isize"; + case 'h': + return "u8"; + case 't': + return "u16"; + case 'm': + return "u32"; + case 'y': + return "u64"; + case 'o': + return "u128"; + case 'j': + return "usize"; + case 'f': + return "f32"; + case 'd': + return "f64"; + case 'z': + return "!"; + case 'p': + return "_"; + case 'v': + return "..."; + + default: + return NULL; + } +} + +static void demangle_type(struct rust_demangler *rdm) { + CHECK_OR(!rdm->errored, return ); + + char tag = next(rdm); + + const char *basic = basic_type(tag); + if (basic) { + PRINT(basic); + return; + } + + switch (tag) { + case 'R': + case 'Q': + PRINT("&"); + if (eat(rdm, 'L')) { + uint64_t lt = parse_integer_62(rdm); + if (lt) { + print_lifetime_from_index(rdm, lt); + PRINT(" "); + } + } + if (tag != 'R') + PRINT("mut "); + demangle_type(rdm); + break; + case 'P': + case 'O': + PRINT("*"); + if (tag != 'P') + PRINT("mut "); + else + PRINT("const "); + demangle_type(rdm); + break; + case 'A': + case 'S': + PRINT("["); + demangle_type(rdm); + if (tag == 'A') { + PRINT("; "); + demangle_const(rdm, true); + } + PRINT("]"); + break; + case 'T': { + PRINT("("); + size_t i; + for (i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { + if (i > 0) + PRINT(", "); + demangle_type(rdm); + } + if (i == 1) + PRINT(","); + PRINT(")"); + break; + } + case 'F': { + uint64_t old_bound_lifetime_depth = rdm->bound_lifetime_depth; + demangle_binder(rdm); + + if (eat(rdm, 'U')) + PRINT("unsafe "); + + if (eat(rdm, 'K')) { + struct rust_mangled_ident abi; + + if (eat(rdm, 'C')) { + abi.ascii = "C"; + abi.ascii_len = 1; + } else { + abi = parse_ident(rdm); + CHECK_OR(abi.ascii && !abi.punycode, goto restore); + } + + PRINT("extern \""); + + // If the ABI had any `-`, they were replaced with `_`, + // so the parts between `_` have to be re-joined with `-`. + for (size_t i = 0; i < abi.ascii_len; i++) { + if (abi.ascii[i] == '_') { + print_str(rdm, abi.ascii, i); + PRINT("-"); + abi.ascii += i + 1; + abi.ascii_len -= i + 1; + i = 0; + } + } + print_str(rdm, abi.ascii, abi.ascii_len); + + PRINT("\" "); + } + + PRINT("fn("); + for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { + if (i > 0) + PRINT(", "); + demangle_type(rdm); + } + PRINT(")"); + + if (eat(rdm, 'u')) { + // Skip printing the return type if it's 'u', i.e. `()`. + } else { + PRINT(" -> "); + demangle_type(rdm); + } + + // Restore `bound_lifetime_depth` to outside the binder. + restore: + rdm->bound_lifetime_depth = old_bound_lifetime_depth; + break; + } + case 'D': + PRINT("dyn "); + + uint64_t old_bound_lifetime_depth = rdm->bound_lifetime_depth; + demangle_binder(rdm); + + for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { + if (i > 0) + PRINT(" + "); + demangle_dyn_trait(rdm); + } + + // Restore `bound_lifetime_depth` to outside the binder. + rdm->bound_lifetime_depth = old_bound_lifetime_depth; + + CHECK_OR(eat(rdm, 'L'), return ); + uint64_t lt = parse_integer_62(rdm); + if (lt) { + PRINT(" + "); + print_lifetime_from_index(rdm, lt); + } + break; + case 'B': { + size_t backref = parse_integer_62(rdm); + if (!rdm->skipping_printing) { + size_t old_next = rdm->next; + rdm->next = backref; + demangle_type(rdm); + rdm->next = old_next; + } + break; + } + default: + // Go back to the tag, so `demangle_path` also sees it. + rdm->next--; + demangle_path(rdm, false); + } +} + +/// A trait in a trait object may have some "existential projections" +/// (i.e. associated type bindings) after it, which should be printed +/// in the `<...>` of the trait, e.g. `dyn Trait`. +/// To this end, this method will keep the `<...>` of an 'I' path +/// open, by omitting the `>`, and return `Ok(true)` in that case. +static bool demangle_path_maybe_open_generics(struct rust_demangler *rdm) { + bool open = false; + + CHECK_OR(!rdm->errored, return open); + + if (eat(rdm, 'B')) { + size_t backref = parse_integer_62(rdm); + if (!rdm->skipping_printing) { + size_t old_next = rdm->next; + rdm->next = backref; + open = demangle_path_maybe_open_generics(rdm); + rdm->next = old_next; + } + } else if (eat(rdm, 'I')) { + demangle_path(rdm, false); + PRINT("<"); + open = true; + for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { + if (i > 0) + PRINT(", "); + demangle_generic_arg(rdm); + } + } else + demangle_path(rdm, false); + return open; +} + +static void demangle_dyn_trait(struct rust_demangler *rdm) { + CHECK_OR(!rdm->errored, return ); + + bool open = demangle_path_maybe_open_generics(rdm); + + while (eat(rdm, 'p')) { + if (!open) + PRINT("<"); + else + PRINT(", "); + open = true; + + struct rust_mangled_ident name = parse_ident(rdm); + print_ident(rdm, name); + PRINT(" = "); + demangle_type(rdm); + } + + if (open) + PRINT(">"); +} + +static void demangle_const(struct rust_demangler *rdm, bool in_value) { + CHECK_OR(!rdm->errored, return ); + + bool opened_brace = false; + + char ty_tag = next(rdm); + switch (ty_tag) { + case 'p': + PRINT("_"); + break; + + // Unsigned integer types. + case 'h': + case 't': + case 'm': + case 'y': + case 'o': + case 'j': + demangle_const_uint(rdm, ty_tag); + break; + + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + if (eat(rdm, 'n')) { + PRINT("-"); + } + demangle_const_uint(rdm, ty_tag); + break; + + case 'b': { + uint64_t value = 0; + size_t hex_len = 0; + while (!eat(rdm, '_')) { + value <<= 4; + + char c = next(rdm); + if (IS_DIGIT(c)) + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + (c - 'a'); + else + ERROR_AND(return ); + hex_len++; + } + + if (value == 0) { + PRINT("false"); + } else if (value == 1) { + PRINT("true"); + } else { + ERROR_AND(return ); + } + break; + } + + case 'c': { + uint64_t value = 0; + size_t hex_len = 0; + while (!eat(rdm, '_')) { + value <<= 4; + + char c = next(rdm); + if (IS_DIGIT(c)) + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + (c - 'a'); + else + ERROR_AND(return ); + hex_len++; + } + + if (value >= 0x10FFFF) + ERROR_AND(return ); + + if (value >= 0xD800 && value <= 0xDFFF) + ERROR_AND(return ); + + PRINT("'"); + print_quoted_escaped_char(rdm, '\'', value); + PRINT("'"); + + break; + } + + case 'e': + // NOTE(eddyb) a string literal `"..."` has type `&str`, so + // to get back the type `str`, `*"..."` syntax is needed + // (even if that may not be valid in Rust itself). + if (!in_value) { + opened_brace = true; + PRINT("{"); + } + PRINT("*"); + + demangle_const_str_literal(rdm); + break; + + case 'R': + case 'Q': + if (ty_tag == 'R' && eat(rdm, 'e')) { + // NOTE(eddyb) this prints `"..."` instead of `&*"..."`, which + // is what `Re..._` would imply (see comment for `str` above). + demangle_const_str_literal(rdm); + break; + } + + if (!in_value) { + opened_brace = true; + PRINT("{"); + } + + PRINT("&"); + if (ty_tag != 'R') { + PRINT("mut "); + } + + demangle_const(rdm, true); + break; + + case 'A': { + if (!in_value) { + opened_brace = true; + PRINT("{"); + } + + PRINT("["); + + size_t i = 0; + while (!eat(rdm, 'E')) { + CHECK_OR(!rdm->errored, return ); + + if (i > 0) + PRINT(", "); + + demangle_const(rdm, true); + + i += 1; + } + + PRINT("]"); + break; + } + + case 'T': { + if (!in_value) { + opened_brace = true; + PRINT("{"); + } + + PRINT("("); + + size_t i = 0; + while (!eat(rdm, 'E')) { + CHECK_OR(!rdm->errored, return ); + + if (i > 0) + PRINT(", "); + + demangle_const(rdm, true); + + i += 1; + } + + if (i == 1) + PRINT(","); + + PRINT(")"); + break; + } + + case 'V': + if (!in_value) { + opened_brace = true; + PRINT("{"); + } + + demangle_path(rdm, true); + + switch (next(rdm)) { + case 'U': + break; + + case 'T': { + PRINT("("); + + size_t i = 0; + while (!eat(rdm, 'E')) { + CHECK_OR(!rdm->errored, return ); + + if (i > 0) + PRINT(", "); + + demangle_const(rdm, true); + + i += 1; + } + + PRINT(")"); + break; + } + + case 'S': { + PRINT(" { "); + + size_t i = 0; + while (!eat(rdm, 'E')) { + CHECK_OR(!rdm->errored, return ); + + if (i > 0) + PRINT(", "); + + parse_disambiguator(rdm); + + struct rust_mangled_ident name = parse_ident(rdm); + print_ident(rdm, name); + + PRINT(": "); + + demangle_const(rdm, true); + + i += 1; + } + + PRINT(" }"); + break; + } + + default: + ERROR_AND(return ); + } + + break; + + case 'B': { + size_t backref = parse_integer_62(rdm); + if (!rdm->skipping_printing) { + size_t old_next = rdm->next; + rdm->next = backref; + demangle_const(rdm, in_value); + rdm->next = old_next; + } + break; +} + + default: + ERROR_AND(return ); + } + + if (opened_brace) { + PRINT("}"); + } +} + +static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag) { + CHECK_OR(!rdm->errored, return ); + + uint64_t value = 0; + size_t hex_len = 0; + while (!eat(rdm, '_')) { + value <<= 4; + + char c = next(rdm); + if (IS_DIGIT(c)) + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + (c - 'a'); + else + ERROR_AND(return ); + hex_len++; + } + + // Print anything that doesn't fit in `uint64_t` verbatim. + if (hex_len > 16) { + PRINT("0x"); + print_str(rdm, rdm->sym + (rdm->next - hex_len - 1), hex_len); + } else { + print_uint64(rdm, value); + } + + if (rdm->verbose) + PRINT(basic_type(ty_tag)); +} + + +static void demangle_const_str_literal(struct rust_demangler *rdm) { + CHECK_OR(!rdm->errored, return ); + + PRINT("\""); + + // FIXME(bjorn3) actually decode UTF-8 strings into individual characters + while (!eat(rdm, '_')) { + uint32_t value = 0; + + char c = next(rdm); + if (IS_DIGIT(c)) + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + (c - 'a'); + else + ERROR_AND(return ); + + value <<= 4; + + c = next(rdm); + if (IS_DIGIT(c)) + value |= c - '0'; + else if (c >= 'a' && c <= 'f') + value |= 10 + (c - 'a'); + else + ERROR_AND(return ); + + print_quoted_escaped_char(rdm, '"', value); + } + + PRINT("\""); +} + +bool sysprof_rust_demangle_with_callback( + const char *mangled, int flags, + void (*callback)(const char *data, size_t len, void *opaque), void *opaque +) { + // Rust symbols always start with R, _R or __R. + if (mangled[0] == '_' && mangled[1] == 'R') + mangled += 2; + else if (mangled[0] == 'R') + // On Windows, dbghelp strips leading underscores, so we accept "R..." + // form too. + mangled += 1; + else if (mangled[0] == '_' && mangled[1] == '_' && mangled[2] == 'R') + // On OSX, symbols are prefixed with an extra _ + mangled += 3; + else + return false; + + // Paths always start with uppercase characters. + if (!IS_UPPER(mangled[0])) + return false; + + struct rust_demangler rdm; + + rdm.sym = mangled; + rdm.sym_len = 0; + + rdm.callback_opaque = opaque; + rdm.callback = callback; + + rdm.next = 0; + rdm.errored = false; + rdm.skipping_printing = false; + rdm.verbose = (flags & RUST_DEMANGLE_FLAG_VERBOSE) != 0; + rdm.version = 0; + rdm.bound_lifetime_depth = 0; + + // Rust symbols only use ASCII characters. + for (const char *p = mangled; *p; p++) { + if ((*p & 0x80) != 0) + return false; + + if (*p == '.' && strncmp(p, ".llvm.", 6) == 0) { + // Ignore .llvm. suffixes + break; + } + + rdm.sym_len++; + } + + demangle_path(&rdm, true); + + // Skip instantiating crate. + if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' && peek(&rdm) <= 'Z') { + rdm.skipping_printing = true; + demangle_path(&rdm, false); + } + + // Print trailing garbage + print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next); + + return !rdm.errored; +} + +// Growable string buffers. +struct str_buf { + char *ptr; + size_t len; + size_t cap; + bool errored; +}; + +static void str_buf_reserve(struct str_buf *buf, size_t extra) { + // Allocation failed before. + if (buf->errored) + return; + + size_t available = buf->cap - buf->len; + + if (extra <= available) + return; + + size_t min_new_cap = buf->cap + (extra - available); + + // Check for overflows. + if (min_new_cap < buf->cap) { + buf->errored = true; + return; + } + + size_t new_cap = buf->cap; + + if (new_cap == 0) + new_cap = 4; + + // Double capacity until sufficiently large. + while (new_cap < min_new_cap) { + new_cap *= 2; + + // Check for overflows. + if (new_cap < buf->cap) { + buf->errored = true; + return; + } + } + + char *new_ptr = (char *)realloc(buf->ptr, new_cap); + if (new_ptr == NULL) { + free(buf->ptr); + buf->ptr = NULL; + buf->len = 0; + buf->cap = 0; + buf->errored = true; + } else { + buf->ptr = new_ptr; + buf->cap = new_cap; + } +} + +static void str_buf_append(struct str_buf *buf, const char *data, size_t len) { + str_buf_reserve(buf, len); + if (buf->errored) + return; + + memcpy(buf->ptr + buf->len, data, len); + buf->len += len; +} + +static void +str_buf_demangle_callback(const char *data, size_t len, void *opaque) { + str_buf_append(opaque, data, len); +} + +char *sysprof_rust_demangle(const char *mangled, int flags) { + struct str_buf out; + + out.ptr = NULL; + out.len = 0; + out.cap = 0; + out.errored = false; + + bool success = sysprof_rust_demangle_with_callback( + mangled, flags, str_buf_demangle_callback, &out + ); + + if (!success) { + free(out.ptr); + return NULL; + } + + str_buf_append(&out, "\0", 1); + return out.ptr; +} diff --git a/contrib/elfparser/rust-demangle.h b/contrib/elfparser/rust-demangle.h new file mode 100644 index 00000000..8017b517 --- /dev/null +++ b/contrib/elfparser/rust-demangle.h @@ -0,0 +1,29 @@ +/* +Imported from https://github.com/LykenSol/rust-demangle.c commit eed29f57732ddb2be434ec89f8ede9b695e5e157 +Modifications from upstream: +* Add sysprof_ prefix to exported symbols and mark them as hidden +* Add pragma once +* Use glib begin/end decls +*/ + +#pragma once + +#include + +G_BEGIN_DECLS + +#include +#include + +#define RUST_DEMANGLE_FLAG_VERBOSE 1 + +G_GNUC_INTERNAL +bool sysprof_rust_demangle_with_callback( + const char *mangled, int flags, + void (*callback)(const char *data, size_t len, void *opaque), void *opaque +); + +G_GNUC_INTERNAL +char *sysprof_rust_demangle(const char *mangled, int flags); + +G_END_DECLS diff --git a/src/libsysprof/sysprof-elf.c b/src/libsysprof/sysprof-elf.c index 404860b4..b721ba89 100644 --- a/src/libsysprof/sysprof-elf.c +++ b/src/libsysprof/sysprof-elf.c @@ -434,7 +434,7 @@ sysprof_elf_get_symbol_at_address_internal (SysprofElf *self, name = elf_parser_get_sym_name (self->parser, symbol); - if (name != NULL && name[0] == '_' && name[1] == 'Z') + if (name != NULL && name[0] == '_' && ((name[1] == 'Z') || (name[1] == 'R'))) ret = elf_demangle (name); else ret = g_strdup (name);