Update the vendored version of rust-demangle.c

Support for the legacy symbol mangling scheme has been added.
This commit is contained in:
bjorn3
2023-10-06 11:23:01 +02:00
parent aa66b71d66
commit ab57e4314f
2 changed files with 233 additions and 34 deletions

View File

@ -1,5 +1,5 @@
/*
Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3
Modifications from upstream:
* Add sysprof_ prefix to exported symbols
* Use g_strdup in sysprof_rust_demangle
@ -176,7 +176,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) {
ident.punycode = NULL;
ident.punycode_len = 0;
bool is_punycode = eat(rdm, 'u');
bool is_punycode = false;
if (rdm->version != -1) {
is_punycode = eat(rdm, 'u');
}
char c = next(rdm);
CHECK_OR(IS_DIGIT(c), return ident);
@ -186,8 +189,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) {
while (IS_DIGIT(peek(rdm)))
len = len * 10 + (next(rdm) - '0');
// Skip past the optional `_` separator.
eat(rdm, '_');
if (rdm->version != -1) {
// Skip past the optional `_` separator.
eat(rdm, '_');
}
size_t start = rdm->next;
rdm->next += len;
@ -1157,30 +1162,173 @@ static void demangle_const_str_literal(struct rust_demangler *rdm) {
PRINT("\"");
}
static bool is_rust_hash(struct rust_mangled_ident name) {
if (name.ascii[0] != 'h') {
return false;
}
for (size_t i = 1; i < name.ascii_len; i++) {
if (!IS_DIGIT(name.ascii[i]) &&
!(name.ascii[i] >= 'a' && name.ascii[i] <= 'f')) {
return false;
}
}
return true;
}
static void print_legacy_ident(
struct rust_demangler *rdm, struct rust_mangled_ident ident
) {
if (rdm->errored || rdm->skipping_printing)
return;
CHECK_OR(!ident.punycode, return);
if (ident.ascii[0] == '_' && ident.ascii[1] == '$') {
ident.ascii += 1;
ident.ascii_len -= 1;
}
while (1) {
if (ident.ascii_len == 0) {
break;
} else if (ident.ascii[0] == '.') {
if (ident.ascii_len >= 2 && ident.ascii[1] == '.') {
PRINT("::");
ident.ascii += 2;
ident.ascii_len -= 2;
} else {
PRINT(".");
ident.ascii += 1;
ident.ascii_len -= 1;
}
} else if (ident.ascii[0] == '$') {
const char *end_ptr =
(const char *)memchr(&ident.ascii[1], '$', ident.ascii_len - 1);
if (!end_ptr)
break;
const char *escape = &ident.ascii[1];
size_t escape_len = end_ptr - escape;
if (strncmp(escape, "SP", 2) == 0) {
PRINT("@");
} else if (strncmp(escape, "BP", 2) == 0) {
PRINT("*");
} else if (strncmp(escape, "RF", 2) == 0) {
PRINT("&");
} else if (strncmp(escape, "LT", 2) == 0) {
PRINT("<");
} else if (strncmp(escape, "GT", 2) == 0) {
PRINT(">");
} else if (strncmp(escape, "LP", 2) == 0) {
PRINT("(");
} else if (strncmp(escape, "RP", 2) == 0) {
PRINT(")");
} else if (strncmp(escape, "C", 1) == 0) {
PRINT(",");
} else {
if (escape[0] != 'u') {
break;
}
const char *digits = &escape[1];
size_t digits_len = escape_len - 1;
bool invalid = false;
for (size_t i = 1; i < digits_len; i++) {
if (!IS_DIGIT(digits[i]) &&
!(digits[i] >= 'a' && digits[i] <= 'f')) {
invalid = true;
break;
}
}
if (invalid)
break;
struct hex_nibbles hex;
hex.nibbles = digits;
hex.nibbles_len = digits_len;
uint32_t c = 0;
for (size_t i = 0; i < hex.nibbles_len; i++)
c = (c << 4) | decode_hex_nibble(hex.nibbles[i]);
if (!(c < 0xd800 || (c > 0xdfff && c < 0x10ffff))) {
break; // Not a valid unicode scalar
}
if (c >= 0x20 && c <= 0x7e) {
// Printable ASCII
char v = (char)c;
print_str(rdm, &v, 1);
} else {
// FIXME show printable unicode characters without hex
// encoding
PRINT("\\u{");
char s[9] = {0};
sprintf(s, "%" PRIx32, c);
PRINT(s);
PRINT("}");
}
}
ident.ascii += escape_len + 2;
ident.ascii_len -= escape_len + 2;
} else {
bool found = false;
for (size_t i = 0; i < ident.ascii_len; i++) {
if (ident.ascii[i] == '$' || ident.ascii[i] == '.') {
print_str(rdm, ident.ascii, i);
ident.ascii += i;
ident.ascii_len -= i;
found = true;
break;
}
}
if (!found) {
break;
}
}
}
print_str(rdm, ident.ascii, ident.ascii_len);
}
static void demangle_legacy_path(struct rust_demangler *rdm) {
bool first = true;
while (1) {
if (eat(rdm, 'E')) {
// FIXME Maybe check if at end of symbol?
return;
}
struct rust_mangled_ident name = parse_ident(rdm);
if (!rdm->verbose && peek(rdm) == 'E' && is_rust_hash(name)) {
// Skip printing the hash if verbose mode is disabled.
eat(rdm, 'E');
break;
}
if (!first) {
PRINT("::");
}
first = false;
print_legacy_ident(rdm, name);
CHECK_OR(!rdm->errored, return);
}
}
bool sysprof_rust_demangle_with_callback(
const char *mangled, int flags,
const char *whole_mangled_symbol, int flags,
void (*callback)(const char *data, size_t len, void *opaque), void *opaque
) {
// Rust symbols always start with R, _R or __R.
if (mangled[0] == '_' && mangled[1] == 'R')
mangled += 2;
else if (mangled[0] == 'R')
// On Windows, dbghelp strips leading underscores, so we accept "R..."
// form too.
mangled += 1;
else if (mangled[0] == '_' && mangled[1] == '_' && mangled[2] == 'R')
// On OSX, symbols are prefixed with an extra _
mangled += 3;
else
return false;
// Paths always start with uppercase characters.
if (!IS_UPPER(mangled[0]))
return false;
struct rust_demangler rdm;
rdm.sym = mangled;
rdm.sym = whole_mangled_symbol;
rdm.sym_len = 0;
rdm.callback_opaque = opaque;
@ -1190,11 +1338,47 @@ bool sysprof_rust_demangle_with_callback(
rdm.errored = false;
rdm.skipping_printing = false;
rdm.verbose = (flags & RUST_DEMANGLE_FLAG_VERBOSE) != 0;
rdm.version = 0;
rdm.version = -2; // Invalid version
rdm.bound_lifetime_depth = 0;
// Rust symbols always start with R, _R or __R for the v0 scheme or ZN, _ZN
// or __ZN for the legacy scheme.
if (strncmp(rdm.sym, "_R", 2) == 0) {
rdm.sym += 2;
rdm.version = 0; // v0
} else if (rdm.sym[0] == 'R') {
// On Windows, dbghelp strips leading underscores, so we accept "R..."
// form too.
rdm.sym += 1;
rdm.version = 0; // v0
} else if (strncmp(rdm.sym, "__R", 3) == 0) {
// On OSX, symbols are prefixed with an extra _
rdm.sym += 3;
rdm.version = 0; // v0
} else if (strncmp(rdm.sym, "_ZN", 3) == 0) {
rdm.sym += 3;
rdm.version = -1; // legacy
} else if (strncmp(rdm.sym, "ZN", 2) == 0) {
// On Windows, dbghelp strips leading underscores, so we accept "R..."
// form too.
rdm.sym += 2;
rdm.version = -1; // legacy
} else if (strncmp(rdm.sym, "__ZN", 4) == 0) {
// On OSX, symbols are prefixed with an extra _
rdm.sym += 4;
rdm.version = -1; // legacy
} else {
return false;
}
if (rdm.version != -1) {
// Paths always start with uppercase characters.
if (!IS_UPPER(rdm.sym[0]))
return false;
}
// Rust symbols only use ASCII characters.
for (const char *p = mangled; *p; p++) {
for (const char *p = rdm.sym; *p; p++) {
if ((*p & 0x80) != 0)
return false;
@ -1206,17 +1390,32 @@ bool sysprof_rust_demangle_with_callback(
rdm.sym_len++;
}
demangle_path(&rdm, true);
if (rdm.version == -1) {
demangle_legacy_path(&rdm);
} else {
demangle_path(&rdm, true);
// Skip instantiating crate.
if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' &&
peek(&rdm) <= 'Z') {
rdm.skipping_printing = true;
demangle_path(&rdm, false);
// Skip instantiating crate.
if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' &&
peek(&rdm) <= 'Z') {
rdm.skipping_printing = true;
demangle_path(&rdm, false);
}
}
// Print trailing garbage
print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next);
if (!rdm.errored && (rdm.sym_len - rdm.next > 0)) {
for (const char *p = rdm.sym + rdm.next; *p; p++) {
// FIXME match is_symbol_like from rustc-demangle
if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
(*p >= '0' && *p <= '9') || *p == '.')) {
// Suffix is not a symbol like string
return false;
}
}
// Print LLVM produced suffix
print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next);
}
return !rdm.errored;
}

View File

@ -1,5 +1,5 @@
/*
Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3
Modifications from upstream:
* Add sysprof_ prefix to exported symbols and mark them as hidden
* Add pragma once