mirror of
https://github.com/varun-r-mallya/sysprof.git
synced 2025-12-31 20:36:25 +00:00
Update the vendored version of rust-demangle.c
Support for the legacy symbol mangling scheme has been added.
This commit is contained in:
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
|
||||
Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3
|
||||
Modifications from upstream:
|
||||
* Add sysprof_ prefix to exported symbols
|
||||
* Use g_strdup in sysprof_rust_demangle
|
||||
@ -176,7 +176,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) {
|
||||
ident.punycode = NULL;
|
||||
ident.punycode_len = 0;
|
||||
|
||||
bool is_punycode = eat(rdm, 'u');
|
||||
bool is_punycode = false;
|
||||
if (rdm->version != -1) {
|
||||
is_punycode = eat(rdm, 'u');
|
||||
}
|
||||
|
||||
char c = next(rdm);
|
||||
CHECK_OR(IS_DIGIT(c), return ident);
|
||||
@ -186,8 +189,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) {
|
||||
while (IS_DIGIT(peek(rdm)))
|
||||
len = len * 10 + (next(rdm) - '0');
|
||||
|
||||
// Skip past the optional `_` separator.
|
||||
eat(rdm, '_');
|
||||
if (rdm->version != -1) {
|
||||
// Skip past the optional `_` separator.
|
||||
eat(rdm, '_');
|
||||
}
|
||||
|
||||
size_t start = rdm->next;
|
||||
rdm->next += len;
|
||||
@ -1157,30 +1162,173 @@ static void demangle_const_str_literal(struct rust_demangler *rdm) {
|
||||
PRINT("\"");
|
||||
}
|
||||
|
||||
static bool is_rust_hash(struct rust_mangled_ident name) {
|
||||
if (name.ascii[0] != 'h') {
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 1; i < name.ascii_len; i++) {
|
||||
if (!IS_DIGIT(name.ascii[i]) &&
|
||||
!(name.ascii[i] >= 'a' && name.ascii[i] <= 'f')) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static void print_legacy_ident(
|
||||
struct rust_demangler *rdm, struct rust_mangled_ident ident
|
||||
) {
|
||||
if (rdm->errored || rdm->skipping_printing)
|
||||
return;
|
||||
|
||||
CHECK_OR(!ident.punycode, return);
|
||||
|
||||
if (ident.ascii[0] == '_' && ident.ascii[1] == '$') {
|
||||
ident.ascii += 1;
|
||||
ident.ascii_len -= 1;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
if (ident.ascii_len == 0) {
|
||||
break;
|
||||
} else if (ident.ascii[0] == '.') {
|
||||
if (ident.ascii_len >= 2 && ident.ascii[1] == '.') {
|
||||
PRINT("::");
|
||||
ident.ascii += 2;
|
||||
ident.ascii_len -= 2;
|
||||
} else {
|
||||
PRINT(".");
|
||||
ident.ascii += 1;
|
||||
ident.ascii_len -= 1;
|
||||
}
|
||||
} else if (ident.ascii[0] == '$') {
|
||||
const char *end_ptr =
|
||||
(const char *)memchr(&ident.ascii[1], '$', ident.ascii_len - 1);
|
||||
if (!end_ptr)
|
||||
break;
|
||||
const char *escape = &ident.ascii[1];
|
||||
size_t escape_len = end_ptr - escape;
|
||||
|
||||
if (strncmp(escape, "SP", 2) == 0) {
|
||||
PRINT("@");
|
||||
} else if (strncmp(escape, "BP", 2) == 0) {
|
||||
PRINT("*");
|
||||
} else if (strncmp(escape, "RF", 2) == 0) {
|
||||
PRINT("&");
|
||||
} else if (strncmp(escape, "LT", 2) == 0) {
|
||||
PRINT("<");
|
||||
} else if (strncmp(escape, "GT", 2) == 0) {
|
||||
PRINT(">");
|
||||
} else if (strncmp(escape, "LP", 2) == 0) {
|
||||
PRINT("(");
|
||||
} else if (strncmp(escape, "RP", 2) == 0) {
|
||||
PRINT(")");
|
||||
} else if (strncmp(escape, "C", 1) == 0) {
|
||||
PRINT(",");
|
||||
} else {
|
||||
if (escape[0] != 'u') {
|
||||
break;
|
||||
}
|
||||
|
||||
const char *digits = &escape[1];
|
||||
size_t digits_len = escape_len - 1;
|
||||
|
||||
bool invalid = false;
|
||||
for (size_t i = 1; i < digits_len; i++) {
|
||||
if (!IS_DIGIT(digits[i]) &&
|
||||
!(digits[i] >= 'a' && digits[i] <= 'f')) {
|
||||
invalid = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (invalid)
|
||||
break;
|
||||
|
||||
struct hex_nibbles hex;
|
||||
|
||||
hex.nibbles = digits;
|
||||
hex.nibbles_len = digits_len;
|
||||
|
||||
uint32_t c = 0;
|
||||
for (size_t i = 0; i < hex.nibbles_len; i++)
|
||||
c = (c << 4) | decode_hex_nibble(hex.nibbles[i]);
|
||||
|
||||
if (!(c < 0xd800 || (c > 0xdfff && c < 0x10ffff))) {
|
||||
break; // Not a valid unicode scalar
|
||||
}
|
||||
|
||||
if (c >= 0x20 && c <= 0x7e) {
|
||||
// Printable ASCII
|
||||
char v = (char)c;
|
||||
print_str(rdm, &v, 1);
|
||||
} else {
|
||||
// FIXME show printable unicode characters without hex
|
||||
// encoding
|
||||
PRINT("\\u{");
|
||||
char s[9] = {0};
|
||||
sprintf(s, "%" PRIx32, c);
|
||||
PRINT(s);
|
||||
PRINT("}");
|
||||
}
|
||||
}
|
||||
|
||||
ident.ascii += escape_len + 2;
|
||||
ident.ascii_len -= escape_len + 2;
|
||||
} else {
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < ident.ascii_len; i++) {
|
||||
if (ident.ascii[i] == '$' || ident.ascii[i] == '.') {
|
||||
print_str(rdm, ident.ascii, i);
|
||||
ident.ascii += i;
|
||||
ident.ascii_len -= i;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
print_str(rdm, ident.ascii, ident.ascii_len);
|
||||
}
|
||||
|
||||
static void demangle_legacy_path(struct rust_demangler *rdm) {
|
||||
bool first = true;
|
||||
|
||||
while (1) {
|
||||
if (eat(rdm, 'E')) {
|
||||
// FIXME Maybe check if at end of symbol?
|
||||
return;
|
||||
}
|
||||
|
||||
struct rust_mangled_ident name = parse_ident(rdm);
|
||||
|
||||
if (!rdm->verbose && peek(rdm) == 'E' && is_rust_hash(name)) {
|
||||
// Skip printing the hash if verbose mode is disabled.
|
||||
eat(rdm, 'E');
|
||||
break;
|
||||
}
|
||||
|
||||
if (!first) {
|
||||
PRINT("::");
|
||||
}
|
||||
first = false;
|
||||
|
||||
print_legacy_ident(rdm, name);
|
||||
|
||||
CHECK_OR(!rdm->errored, return);
|
||||
}
|
||||
}
|
||||
|
||||
bool sysprof_rust_demangle_with_callback(
|
||||
const char *mangled, int flags,
|
||||
const char *whole_mangled_symbol, int flags,
|
||||
void (*callback)(const char *data, size_t len, void *opaque), void *opaque
|
||||
) {
|
||||
// Rust symbols always start with R, _R or __R.
|
||||
if (mangled[0] == '_' && mangled[1] == 'R')
|
||||
mangled += 2;
|
||||
else if (mangled[0] == 'R')
|
||||
// On Windows, dbghelp strips leading underscores, so we accept "R..."
|
||||
// form too.
|
||||
mangled += 1;
|
||||
else if (mangled[0] == '_' && mangled[1] == '_' && mangled[2] == 'R')
|
||||
// On OSX, symbols are prefixed with an extra _
|
||||
mangled += 3;
|
||||
else
|
||||
return false;
|
||||
|
||||
// Paths always start with uppercase characters.
|
||||
if (!IS_UPPER(mangled[0]))
|
||||
return false;
|
||||
|
||||
struct rust_demangler rdm;
|
||||
|
||||
rdm.sym = mangled;
|
||||
rdm.sym = whole_mangled_symbol;
|
||||
rdm.sym_len = 0;
|
||||
|
||||
rdm.callback_opaque = opaque;
|
||||
@ -1190,11 +1338,47 @@ bool sysprof_rust_demangle_with_callback(
|
||||
rdm.errored = false;
|
||||
rdm.skipping_printing = false;
|
||||
rdm.verbose = (flags & RUST_DEMANGLE_FLAG_VERBOSE) != 0;
|
||||
rdm.version = 0;
|
||||
rdm.version = -2; // Invalid version
|
||||
rdm.bound_lifetime_depth = 0;
|
||||
|
||||
// Rust symbols always start with R, _R or __R for the v0 scheme or ZN, _ZN
|
||||
// or __ZN for the legacy scheme.
|
||||
if (strncmp(rdm.sym, "_R", 2) == 0) {
|
||||
rdm.sym += 2;
|
||||
rdm.version = 0; // v0
|
||||
} else if (rdm.sym[0] == 'R') {
|
||||
// On Windows, dbghelp strips leading underscores, so we accept "R..."
|
||||
// form too.
|
||||
rdm.sym += 1;
|
||||
rdm.version = 0; // v0
|
||||
} else if (strncmp(rdm.sym, "__R", 3) == 0) {
|
||||
// On OSX, symbols are prefixed with an extra _
|
||||
rdm.sym += 3;
|
||||
rdm.version = 0; // v0
|
||||
} else if (strncmp(rdm.sym, "_ZN", 3) == 0) {
|
||||
rdm.sym += 3;
|
||||
rdm.version = -1; // legacy
|
||||
} else if (strncmp(rdm.sym, "ZN", 2) == 0) {
|
||||
// On Windows, dbghelp strips leading underscores, so we accept "R..."
|
||||
// form too.
|
||||
rdm.sym += 2;
|
||||
rdm.version = -1; // legacy
|
||||
} else if (strncmp(rdm.sym, "__ZN", 4) == 0) {
|
||||
// On OSX, symbols are prefixed with an extra _
|
||||
rdm.sym += 4;
|
||||
rdm.version = -1; // legacy
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (rdm.version != -1) {
|
||||
// Paths always start with uppercase characters.
|
||||
if (!IS_UPPER(rdm.sym[0]))
|
||||
return false;
|
||||
}
|
||||
|
||||
// Rust symbols only use ASCII characters.
|
||||
for (const char *p = mangled; *p; p++) {
|
||||
for (const char *p = rdm.sym; *p; p++) {
|
||||
if ((*p & 0x80) != 0)
|
||||
return false;
|
||||
|
||||
@ -1206,17 +1390,32 @@ bool sysprof_rust_demangle_with_callback(
|
||||
rdm.sym_len++;
|
||||
}
|
||||
|
||||
demangle_path(&rdm, true);
|
||||
if (rdm.version == -1) {
|
||||
demangle_legacy_path(&rdm);
|
||||
} else {
|
||||
demangle_path(&rdm, true);
|
||||
|
||||
// Skip instantiating crate.
|
||||
if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' &&
|
||||
peek(&rdm) <= 'Z') {
|
||||
rdm.skipping_printing = true;
|
||||
demangle_path(&rdm, false);
|
||||
// Skip instantiating crate.
|
||||
if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' &&
|
||||
peek(&rdm) <= 'Z') {
|
||||
rdm.skipping_printing = true;
|
||||
demangle_path(&rdm, false);
|
||||
}
|
||||
}
|
||||
|
||||
// Print trailing garbage
|
||||
print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next);
|
||||
if (!rdm.errored && (rdm.sym_len - rdm.next > 0)) {
|
||||
for (const char *p = rdm.sym + rdm.next; *p; p++) {
|
||||
// FIXME match is_symbol_like from rustc-demangle
|
||||
if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
||||
(*p >= '0' && *p <= '9') || *p == '.')) {
|
||||
// Suffix is not a symbol like string
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Print LLVM produced suffix
|
||||
print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next);
|
||||
}
|
||||
|
||||
return !rdm.errored;
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
/*
|
||||
Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
|
||||
Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3
|
||||
Modifications from upstream:
|
||||
* Add sysprof_ prefix to exported symbols and mark them as hidden
|
||||
* Add pragma once
|
||||
|
||||
Reference in New Issue
Block a user