mirror of
https://github.com/varun-r-mallya/sysprof.git
synced 2026-02-11 07:30:54 +00:00
Update the vendored version of rust-demangle.c
Support for the legacy symbol mangling scheme has been added.
This commit is contained in:
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
|
Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3
|
||||||
Modifications from upstream:
|
Modifications from upstream:
|
||||||
* Add sysprof_ prefix to exported symbols
|
* Add sysprof_ prefix to exported symbols
|
||||||
* Use g_strdup in sysprof_rust_demangle
|
* Use g_strdup in sysprof_rust_demangle
|
||||||
@ -176,7 +176,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) {
|
|||||||
ident.punycode = NULL;
|
ident.punycode = NULL;
|
||||||
ident.punycode_len = 0;
|
ident.punycode_len = 0;
|
||||||
|
|
||||||
bool is_punycode = eat(rdm, 'u');
|
bool is_punycode = false;
|
||||||
|
if (rdm->version != -1) {
|
||||||
|
is_punycode = eat(rdm, 'u');
|
||||||
|
}
|
||||||
|
|
||||||
char c = next(rdm);
|
char c = next(rdm);
|
||||||
CHECK_OR(IS_DIGIT(c), return ident);
|
CHECK_OR(IS_DIGIT(c), return ident);
|
||||||
@ -186,8 +189,10 @@ static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) {
|
|||||||
while (IS_DIGIT(peek(rdm)))
|
while (IS_DIGIT(peek(rdm)))
|
||||||
len = len * 10 + (next(rdm) - '0');
|
len = len * 10 + (next(rdm) - '0');
|
||||||
|
|
||||||
// Skip past the optional `_` separator.
|
if (rdm->version != -1) {
|
||||||
eat(rdm, '_');
|
// Skip past the optional `_` separator.
|
||||||
|
eat(rdm, '_');
|
||||||
|
}
|
||||||
|
|
||||||
size_t start = rdm->next;
|
size_t start = rdm->next;
|
||||||
rdm->next += len;
|
rdm->next += len;
|
||||||
@ -1157,30 +1162,173 @@ static void demangle_const_str_literal(struct rust_demangler *rdm) {
|
|||||||
PRINT("\"");
|
PRINT("\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool is_rust_hash(struct rust_mangled_ident name) {
|
||||||
|
if (name.ascii[0] != 'h') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (size_t i = 1; i < name.ascii_len; i++) {
|
||||||
|
if (!IS_DIGIT(name.ascii[i]) &&
|
||||||
|
!(name.ascii[i] >= 'a' && name.ascii[i] <= 'f')) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_legacy_ident(
|
||||||
|
struct rust_demangler *rdm, struct rust_mangled_ident ident
|
||||||
|
) {
|
||||||
|
if (rdm->errored || rdm->skipping_printing)
|
||||||
|
return;
|
||||||
|
|
||||||
|
CHECK_OR(!ident.punycode, return);
|
||||||
|
|
||||||
|
if (ident.ascii[0] == '_' && ident.ascii[1] == '$') {
|
||||||
|
ident.ascii += 1;
|
||||||
|
ident.ascii_len -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
if (ident.ascii_len == 0) {
|
||||||
|
break;
|
||||||
|
} else if (ident.ascii[0] == '.') {
|
||||||
|
if (ident.ascii_len >= 2 && ident.ascii[1] == '.') {
|
||||||
|
PRINT("::");
|
||||||
|
ident.ascii += 2;
|
||||||
|
ident.ascii_len -= 2;
|
||||||
|
} else {
|
||||||
|
PRINT(".");
|
||||||
|
ident.ascii += 1;
|
||||||
|
ident.ascii_len -= 1;
|
||||||
|
}
|
||||||
|
} else if (ident.ascii[0] == '$') {
|
||||||
|
const char *end_ptr =
|
||||||
|
(const char *)memchr(&ident.ascii[1], '$', ident.ascii_len - 1);
|
||||||
|
if (!end_ptr)
|
||||||
|
break;
|
||||||
|
const char *escape = &ident.ascii[1];
|
||||||
|
size_t escape_len = end_ptr - escape;
|
||||||
|
|
||||||
|
if (strncmp(escape, "SP", 2) == 0) {
|
||||||
|
PRINT("@");
|
||||||
|
} else if (strncmp(escape, "BP", 2) == 0) {
|
||||||
|
PRINT("*");
|
||||||
|
} else if (strncmp(escape, "RF", 2) == 0) {
|
||||||
|
PRINT("&");
|
||||||
|
} else if (strncmp(escape, "LT", 2) == 0) {
|
||||||
|
PRINT("<");
|
||||||
|
} else if (strncmp(escape, "GT", 2) == 0) {
|
||||||
|
PRINT(">");
|
||||||
|
} else if (strncmp(escape, "LP", 2) == 0) {
|
||||||
|
PRINT("(");
|
||||||
|
} else if (strncmp(escape, "RP", 2) == 0) {
|
||||||
|
PRINT(")");
|
||||||
|
} else if (strncmp(escape, "C", 1) == 0) {
|
||||||
|
PRINT(",");
|
||||||
|
} else {
|
||||||
|
if (escape[0] != 'u') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *digits = &escape[1];
|
||||||
|
size_t digits_len = escape_len - 1;
|
||||||
|
|
||||||
|
bool invalid = false;
|
||||||
|
for (size_t i = 1; i < digits_len; i++) {
|
||||||
|
if (!IS_DIGIT(digits[i]) &&
|
||||||
|
!(digits[i] >= 'a' && digits[i] <= 'f')) {
|
||||||
|
invalid = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (invalid)
|
||||||
|
break;
|
||||||
|
|
||||||
|
struct hex_nibbles hex;
|
||||||
|
|
||||||
|
hex.nibbles = digits;
|
||||||
|
hex.nibbles_len = digits_len;
|
||||||
|
|
||||||
|
uint32_t c = 0;
|
||||||
|
for (size_t i = 0; i < hex.nibbles_len; i++)
|
||||||
|
c = (c << 4) | decode_hex_nibble(hex.nibbles[i]);
|
||||||
|
|
||||||
|
if (!(c < 0xd800 || (c > 0xdfff && c < 0x10ffff))) {
|
||||||
|
break; // Not a valid unicode scalar
|
||||||
|
}
|
||||||
|
|
||||||
|
if (c >= 0x20 && c <= 0x7e) {
|
||||||
|
// Printable ASCII
|
||||||
|
char v = (char)c;
|
||||||
|
print_str(rdm, &v, 1);
|
||||||
|
} else {
|
||||||
|
// FIXME show printable unicode characters without hex
|
||||||
|
// encoding
|
||||||
|
PRINT("\\u{");
|
||||||
|
char s[9] = {0};
|
||||||
|
sprintf(s, "%" PRIx32, c);
|
||||||
|
PRINT(s);
|
||||||
|
PRINT("}");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ident.ascii += escape_len + 2;
|
||||||
|
ident.ascii_len -= escape_len + 2;
|
||||||
|
} else {
|
||||||
|
bool found = false;
|
||||||
|
for (size_t i = 0; i < ident.ascii_len; i++) {
|
||||||
|
if (ident.ascii[i] == '$' || ident.ascii[i] == '.') {
|
||||||
|
print_str(rdm, ident.ascii, i);
|
||||||
|
ident.ascii += i;
|
||||||
|
ident.ascii_len -= i;
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
print_str(rdm, ident.ascii, ident.ascii_len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void demangle_legacy_path(struct rust_demangler *rdm) {
|
||||||
|
bool first = true;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
if (eat(rdm, 'E')) {
|
||||||
|
// FIXME Maybe check if at end of symbol?
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct rust_mangled_ident name = parse_ident(rdm);
|
||||||
|
|
||||||
|
if (!rdm->verbose && peek(rdm) == 'E' && is_rust_hash(name)) {
|
||||||
|
// Skip printing the hash if verbose mode is disabled.
|
||||||
|
eat(rdm, 'E');
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!first) {
|
||||||
|
PRINT("::");
|
||||||
|
}
|
||||||
|
first = false;
|
||||||
|
|
||||||
|
print_legacy_ident(rdm, name);
|
||||||
|
|
||||||
|
CHECK_OR(!rdm->errored, return);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool sysprof_rust_demangle_with_callback(
|
bool sysprof_rust_demangle_with_callback(
|
||||||
const char *mangled, int flags,
|
const char *whole_mangled_symbol, int flags,
|
||||||
void (*callback)(const char *data, size_t len, void *opaque), void *opaque
|
void (*callback)(const char *data, size_t len, void *opaque), void *opaque
|
||||||
) {
|
) {
|
||||||
// Rust symbols always start with R, _R or __R.
|
|
||||||
if (mangled[0] == '_' && mangled[1] == 'R')
|
|
||||||
mangled += 2;
|
|
||||||
else if (mangled[0] == 'R')
|
|
||||||
// On Windows, dbghelp strips leading underscores, so we accept "R..."
|
|
||||||
// form too.
|
|
||||||
mangled += 1;
|
|
||||||
else if (mangled[0] == '_' && mangled[1] == '_' && mangled[2] == 'R')
|
|
||||||
// On OSX, symbols are prefixed with an extra _
|
|
||||||
mangled += 3;
|
|
||||||
else
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Paths always start with uppercase characters.
|
|
||||||
if (!IS_UPPER(mangled[0]))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
struct rust_demangler rdm;
|
struct rust_demangler rdm;
|
||||||
|
|
||||||
rdm.sym = mangled;
|
rdm.sym = whole_mangled_symbol;
|
||||||
rdm.sym_len = 0;
|
rdm.sym_len = 0;
|
||||||
|
|
||||||
rdm.callback_opaque = opaque;
|
rdm.callback_opaque = opaque;
|
||||||
@ -1190,11 +1338,47 @@ bool sysprof_rust_demangle_with_callback(
|
|||||||
rdm.errored = false;
|
rdm.errored = false;
|
||||||
rdm.skipping_printing = false;
|
rdm.skipping_printing = false;
|
||||||
rdm.verbose = (flags & RUST_DEMANGLE_FLAG_VERBOSE) != 0;
|
rdm.verbose = (flags & RUST_DEMANGLE_FLAG_VERBOSE) != 0;
|
||||||
rdm.version = 0;
|
rdm.version = -2; // Invalid version
|
||||||
rdm.bound_lifetime_depth = 0;
|
rdm.bound_lifetime_depth = 0;
|
||||||
|
|
||||||
|
// Rust symbols always start with R, _R or __R for the v0 scheme or ZN, _ZN
|
||||||
|
// or __ZN for the legacy scheme.
|
||||||
|
if (strncmp(rdm.sym, "_R", 2) == 0) {
|
||||||
|
rdm.sym += 2;
|
||||||
|
rdm.version = 0; // v0
|
||||||
|
} else if (rdm.sym[0] == 'R') {
|
||||||
|
// On Windows, dbghelp strips leading underscores, so we accept "R..."
|
||||||
|
// form too.
|
||||||
|
rdm.sym += 1;
|
||||||
|
rdm.version = 0; // v0
|
||||||
|
} else if (strncmp(rdm.sym, "__R", 3) == 0) {
|
||||||
|
// On OSX, symbols are prefixed with an extra _
|
||||||
|
rdm.sym += 3;
|
||||||
|
rdm.version = 0; // v0
|
||||||
|
} else if (strncmp(rdm.sym, "_ZN", 3) == 0) {
|
||||||
|
rdm.sym += 3;
|
||||||
|
rdm.version = -1; // legacy
|
||||||
|
} else if (strncmp(rdm.sym, "ZN", 2) == 0) {
|
||||||
|
// On Windows, dbghelp strips leading underscores, so we accept "R..."
|
||||||
|
// form too.
|
||||||
|
rdm.sym += 2;
|
||||||
|
rdm.version = -1; // legacy
|
||||||
|
} else if (strncmp(rdm.sym, "__ZN", 4) == 0) {
|
||||||
|
// On OSX, symbols are prefixed with an extra _
|
||||||
|
rdm.sym += 4;
|
||||||
|
rdm.version = -1; // legacy
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rdm.version != -1) {
|
||||||
|
// Paths always start with uppercase characters.
|
||||||
|
if (!IS_UPPER(rdm.sym[0]))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Rust symbols only use ASCII characters.
|
// Rust symbols only use ASCII characters.
|
||||||
for (const char *p = mangled; *p; p++) {
|
for (const char *p = rdm.sym; *p; p++) {
|
||||||
if ((*p & 0x80) != 0)
|
if ((*p & 0x80) != 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
@ -1206,17 +1390,32 @@ bool sysprof_rust_demangle_with_callback(
|
|||||||
rdm.sym_len++;
|
rdm.sym_len++;
|
||||||
}
|
}
|
||||||
|
|
||||||
demangle_path(&rdm, true);
|
if (rdm.version == -1) {
|
||||||
|
demangle_legacy_path(&rdm);
|
||||||
|
} else {
|
||||||
|
demangle_path(&rdm, true);
|
||||||
|
|
||||||
// Skip instantiating crate.
|
// Skip instantiating crate.
|
||||||
if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' &&
|
if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' &&
|
||||||
peek(&rdm) <= 'Z') {
|
peek(&rdm) <= 'Z') {
|
||||||
rdm.skipping_printing = true;
|
rdm.skipping_printing = true;
|
||||||
demangle_path(&rdm, false);
|
demangle_path(&rdm, false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print trailing garbage
|
if (!rdm.errored && (rdm.sym_len - rdm.next > 0)) {
|
||||||
print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next);
|
for (const char *p = rdm.sym + rdm.next; *p; p++) {
|
||||||
|
// FIXME match is_symbol_like from rustc-demangle
|
||||||
|
if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') ||
|
||||||
|
(*p >= '0' && *p <= '9') || *p == '.')) {
|
||||||
|
// Suffix is not a symbol like string
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print LLVM produced suffix
|
||||||
|
print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next);
|
||||||
|
}
|
||||||
|
|
||||||
return !rdm.errored;
|
return !rdm.errored;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
|
Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3
|
||||||
Modifications from upstream:
|
Modifications from upstream:
|
||||||
* Add sysprof_ prefix to exported symbols and mark them as hidden
|
* Add sysprof_ prefix to exported symbols and mark them as hidden
|
||||||
* Add pragma once
|
* Add pragma once
|
||||||
|
|||||||
Reference in New Issue
Block a user