diff --git a/contrib/elfparser/demangle.cpp b/contrib/elfparser/demangle.cpp index 6239b52f..1b2852dd 100644 --- a/contrib/elfparser/demangle.cpp +++ b/contrib/elfparser/demangle.cpp @@ -27,8 +27,6 @@ sysprof_cplus_demangle (const char *name) char *real_name; char *ret; int status; - guint i; - guint j; if (name == NULL) return NULL; @@ -38,35 +36,7 @@ sysprof_cplus_demangle (const char *name) if (real_name == NULL) return NULL; - /* We need to return a string that is guaranteed it can be freed with - * g_free() rather than free(), so we might as well look for Legacy - * Rust mangling like '$LT$' and '$GT$' while we're at it. - */ - - ret = (char *)g_malloc (strlen (real_name) + 1); - - for (i = 0, j = 0; real_name[i]; i++) - { - if (real_name[i] == '$') - { - if (real_name[i+1] == 'L' && real_name[i+2] == 'T' && real_name[i+3] == '$') - ret[j++] = '<', i += 3; - else if (real_name[i+1] == 'G' && real_name[i+2] == 'T' && real_name[i+3] == '$') - ret[j++] = '>', i += 3; - } - else if (real_name[i] == '.' && real_name[i+1] == '.') - { - ret[j++] = ':', i++; - ret[j++] = ':'; - } - else - { - ret[j++] = real_name[i]; - } - } - - ret[j] = 0; - + ret = g_strdup (real_name); free (real_name); return ret; diff --git a/contrib/elfparser/rust-demangle.c b/contrib/elfparser/rust-demangle.c index 14c5db3c..0f2bd75b 100644 --- a/contrib/elfparser/rust-demangle.c +++ b/contrib/elfparser/rust-demangle.c @@ -1,5 +1,5 @@ /* -Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f +Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3 Modifications from upstream: * Add sysprof_ prefix to exported symbols * Use g_strdup in sysprof_rust_demangle @@ -176,7 +176,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) { ident.punycode = NULL; ident.punycode_len = 0; - bool is_punycode = eat(rdm, 'u'); + bool is_punycode = false; + if (rdm->version != -1) { + is_punycode = eat(rdm, 'u'); + } char c = next(rdm); CHECK_OR(IS_DIGIT(c), return ident); @@ -186,8 +189,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) { while (IS_DIGIT(peek(rdm))) len = len * 10 + (next(rdm) - '0'); - // Skip past the optional `_` separator. - eat(rdm, '_'); + if (rdm->version != -1) { + // Skip past the optional `_` separator. + eat(rdm, '_'); + } size_t start = rdm->next; rdm->next += len; @@ -1157,30 +1162,173 @@ static void demangle_const_str_literal(struct rust_demangler *rdm) { PRINT("\""); } +static bool is_rust_hash(struct rust_mangled_ident name) { + if (name.ascii[0] != 'h') { + return false; + } + for (size_t i = 1; i < name.ascii_len; i++) { + if (!IS_DIGIT(name.ascii[i]) && + !(name.ascii[i] >= 'a' && name.ascii[i] <= 'f')) { + return false; + } + } + return true; +} + +static void print_legacy_ident( + struct rust_demangler *rdm, struct rust_mangled_ident ident +) { + if (rdm->errored || rdm->skipping_printing) + return; + + CHECK_OR(!ident.punycode, return); + + if (ident.ascii[0] == '_' && ident.ascii[1] == '$') { + ident.ascii += 1; + ident.ascii_len -= 1; + } + + while (1) { + if (ident.ascii_len == 0) { + break; + } else if (ident.ascii[0] == '.') { + if (ident.ascii_len >= 2 && ident.ascii[1] == '.') { + PRINT("::"); + ident.ascii += 2; + ident.ascii_len -= 2; + } else { + PRINT("."); + ident.ascii += 1; + ident.ascii_len -= 1; + } + } else if (ident.ascii[0] == '$') { + const char *end_ptr = + (const char *)memchr(&ident.ascii[1], '$', ident.ascii_len - 1); + if (!end_ptr) + break; + const char *escape = &ident.ascii[1]; + size_t escape_len = end_ptr - escape; + + if (strncmp(escape, "SP", 2) == 0) { + PRINT("@"); + } else if (strncmp(escape, "BP", 2) == 0) { + PRINT("*"); + } else if (strncmp(escape, "RF", 2) == 0) { + PRINT("&"); + } else if (strncmp(escape, "LT", 2) == 0) { + PRINT("<"); + } else if (strncmp(escape, "GT", 2) == 0) { + PRINT(">"); + } else if (strncmp(escape, "LP", 2) == 0) { + PRINT("("); + } else if (strncmp(escape, "RP", 2) == 0) { + PRINT(")"); + } else if (strncmp(escape, "C", 1) == 0) { + PRINT(","); + } else { + if (escape[0] != 'u') { + break; + } + + const char *digits = &escape[1]; + size_t digits_len = escape_len - 1; + + bool invalid = false; + for (size_t i = 1; i < digits_len; i++) { + if (!IS_DIGIT(digits[i]) && + !(digits[i] >= 'a' && digits[i] <= 'f')) { + invalid = true; + break; + } + } + if (invalid) + break; + + struct hex_nibbles hex; + + hex.nibbles = digits; + hex.nibbles_len = digits_len; + + uint32_t c = 0; + for (size_t i = 0; i < hex.nibbles_len; i++) + c = (c << 4) | decode_hex_nibble(hex.nibbles[i]); + + if (!(c < 0xd800 || (c > 0xdfff && c < 0x10ffff))) { + break; // Not a valid unicode scalar + } + + if (c >= 0x20 && c <= 0x7e) { + // Printable ASCII + char v = (char)c; + print_str(rdm, &v, 1); + } else { + // FIXME show printable unicode characters without hex + // encoding + PRINT("\\u{"); + char s[9] = {0}; + sprintf(s, "%" PRIx32, c); + PRINT(s); + PRINT("}"); + } + } + + ident.ascii += escape_len + 2; + ident.ascii_len -= escape_len + 2; + } else { + bool found = false; + for (size_t i = 0; i < ident.ascii_len; i++) { + if (ident.ascii[i] == '$' || ident.ascii[i] == '.') { + print_str(rdm, ident.ascii, i); + ident.ascii += i; + ident.ascii_len -= i; + found = true; + break; + } + } + if (!found) { + break; + } + } + } + + print_str(rdm, ident.ascii, ident.ascii_len); +} + +static void demangle_legacy_path(struct rust_demangler *rdm) { + bool first = true; + + while (1) { + if (eat(rdm, 'E')) { + // FIXME Maybe check if at end of symbol? + return; + } + + struct rust_mangled_ident name = parse_ident(rdm); + + if (!rdm->verbose && peek(rdm) == 'E' && is_rust_hash(name)) { + // Skip printing the hash if verbose mode is disabled. + eat(rdm, 'E'); + break; + } + + if (!first) { + PRINT("::"); + } + first = false; + + print_legacy_ident(rdm, name); + + CHECK_OR(!rdm->errored, return); + } +} + bool sysprof_rust_demangle_with_callback( - const char *mangled, int flags, + const char *whole_mangled_symbol, int flags, void (*callback)(const char *data, size_t len, void *opaque), void *opaque ) { - // Rust symbols always start with R, _R or __R. - if (mangled[0] == '_' && mangled[1] == 'R') - mangled += 2; - else if (mangled[0] == 'R') - // On Windows, dbghelp strips leading underscores, so we accept "R..." - // form too. - mangled += 1; - else if (mangled[0] == '_' && mangled[1] == '_' && mangled[2] == 'R') - // On OSX, symbols are prefixed with an extra _ - mangled += 3; - else - return false; - - // Paths always start with uppercase characters. - if (!IS_UPPER(mangled[0])) - return false; - struct rust_demangler rdm; - rdm.sym = mangled; + rdm.sym = whole_mangled_symbol; rdm.sym_len = 0; rdm.callback_opaque = opaque; @@ -1190,11 +1338,47 @@ bool sysprof_rust_demangle_with_callback( rdm.errored = false; rdm.skipping_printing = false; rdm.verbose = (flags & RUST_DEMANGLE_FLAG_VERBOSE) != 0; - rdm.version = 0; + rdm.version = -2; // Invalid version rdm.bound_lifetime_depth = 0; + // Rust symbols always start with R, _R or __R for the v0 scheme or ZN, _ZN + // or __ZN for the legacy scheme. + if (strncmp(rdm.sym, "_R", 2) == 0) { + rdm.sym += 2; + rdm.version = 0; // v0 + } else if (rdm.sym[0] == 'R') { + // On Windows, dbghelp strips leading underscores, so we accept "R..." + // form too. + rdm.sym += 1; + rdm.version = 0; // v0 + } else if (strncmp(rdm.sym, "__R", 3) == 0) { + // On OSX, symbols are prefixed with an extra _ + rdm.sym += 3; + rdm.version = 0; // v0 + } else if (strncmp(rdm.sym, "_ZN", 3) == 0) { + rdm.sym += 3; + rdm.version = -1; // legacy + } else if (strncmp(rdm.sym, "ZN", 2) == 0) { + // On Windows, dbghelp strips leading underscores, so we accept "R..." + // form too. + rdm.sym += 2; + rdm.version = -1; // legacy + } else if (strncmp(rdm.sym, "__ZN", 4) == 0) { + // On OSX, symbols are prefixed with an extra _ + rdm.sym += 4; + rdm.version = -1; // legacy + } else { + return false; + } + + if (rdm.version != -1) { + // Paths always start with uppercase characters. + if (!IS_UPPER(rdm.sym[0])) + return false; + } + // Rust symbols only use ASCII characters. - for (const char *p = mangled; *p; p++) { + for (const char *p = rdm.sym; *p; p++) { if ((*p & 0x80) != 0) return false; @@ -1206,17 +1390,32 @@ bool sysprof_rust_demangle_with_callback( rdm.sym_len++; } - demangle_path(&rdm, true); + if (rdm.version == -1) { + demangle_legacy_path(&rdm); + } else { + demangle_path(&rdm, true); - // Skip instantiating crate. - if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' && - peek(&rdm) <= 'Z') { - rdm.skipping_printing = true; - demangle_path(&rdm, false); + // Skip instantiating crate. + if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' && + peek(&rdm) <= 'Z') { + rdm.skipping_printing = true; + demangle_path(&rdm, false); + } } - // Print trailing garbage - print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next); + if (!rdm.errored && (rdm.sym_len - rdm.next > 0)) { + for (const char *p = rdm.sym + rdm.next; *p; p++) { + // FIXME match is_symbol_like from rustc-demangle + if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || + (*p >= '0' && *p <= '9') || *p == '.')) { + // Suffix is not a symbol like string + return false; + } + } + + // Print LLVM produced suffix + print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next); + } return !rdm.errored; } diff --git a/contrib/elfparser/rust-demangle.h b/contrib/elfparser/rust-demangle.h index 7b7d013c..c40df3b0 100644 --- a/contrib/elfparser/rust-demangle.h +++ b/contrib/elfparser/rust-demangle.h @@ -1,5 +1,5 @@ /* -Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f +Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3 Modifications from upstream: * Add sysprof_ prefix to exported symbols and mark them as hidden * Add pragma once