/* Imported from https://github.com/LykenSol/rust-demangle.c commit 4283d46e4064a7e1c54bc9918a07b066cb43fca3 Modifications from upstream: * Add sysprof_ prefix to exported symbols * Use g_strdup in sysprof_rust_demangle */ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeclaration-after-statement" // FIXME(eddyb) should this use ``? #include "rust-demangle.h" #include #include #include #include struct rust_demangler { const char *sym; size_t sym_len; void *callback_opaque; void (*callback)(const char *data, size_t len, void *opaque); // Position of the next character to read from the symbol. size_t next; // `true` if any error occurred. bool errored; // `true` if nothing should be printed. bool skipping_printing; // `true` if printing should be verbose (e.g. include hashes). bool verbose; // Rust mangling version, with legacy mangling being -1. int version; uint64_t bound_lifetime_depth; }; #define ERROR_AND(x) \ do { \ rdm->errored = true; \ x; \ } while (0) #define CHECK_OR(cond, x) \ do { \ if (!(cond)) \ ERROR_AND(x); \ } while (0) // FIXME(eddyb) consider renaming these to not start with `IS` (UB?). #define IS_DIGIT(c) ((c) >= '0' && (c) <= '9') #define IS_UPPER(c) ((c) >= 'A' && (c) <= 'Z') #define IS_LOWER(c) ((c) >= 'a' && (c) <= 'z') // Parsing functions. static char peek(const struct rust_demangler *rdm) { if (rdm->next < rdm->sym_len) return rdm->sym[rdm->next]; return 0; } static bool eat(struct rust_demangler *rdm, char c) { if (peek(rdm) == c) { rdm->next++; return true; } else return false; } static char next(struct rust_demangler *rdm) { char c = peek(rdm); CHECK_OR(c, return 0); rdm->next++; return c; } struct hex_nibbles { const char *nibbles; size_t nibbles_len; }; static struct hex_nibbles parse_hex_nibbles(struct rust_demangler *rdm) { struct hex_nibbles hex; hex.nibbles = NULL; hex.nibbles_len = 0; size_t start = rdm->next, hex_len = 0; while (!eat(rdm, '_')) { char c = next(rdm); CHECK_OR(IS_DIGIT(c) || (c >= 'a' && c <= 'f'), return hex); hex_len++; } hex.nibbles = rdm->sym + start; hex.nibbles_len = hex_len; return hex; } static struct hex_nibbles parse_hex_nibbles_for_const_uint(struct rust_demangler *rdm) { struct hex_nibbles hex = parse_hex_nibbles(rdm); CHECK_OR(!rdm->errored, return hex); // Trim leading `0`s. while (hex.nibbles_len > 0 && *hex.nibbles == '0') { hex.nibbles++; hex.nibbles_len--; } return hex; } static struct hex_nibbles parse_hex_nibbles_for_const_bytes(struct rust_demangler *rdm) { struct hex_nibbles hex = parse_hex_nibbles(rdm); CHECK_OR(!rdm->errored && (hex.nibbles_len % 2 == 0), return hex); return hex; } static uint8_t decode_hex_nibble(char nibble) { return nibble >= 'a' ? 10 + (nibble - 'a') : nibble - '0'; } static uint64_t parse_integer_62(struct rust_demangler *rdm) { if (eat(rdm, '_')) return 0; uint64_t x = 0; while (!eat(rdm, '_')) { char c = next(rdm); x *= 62; if (IS_DIGIT(c)) x += c - '0'; else if (IS_LOWER(c)) x += 10 + (c - 'a'); else if (IS_UPPER(c)) x += 10 + 26 + (c - 'A'); else ERROR_AND(return 0); } return x + 1; } static uint64_t parse_opt_integer_62(struct rust_demangler *rdm, char tag) { if (!eat(rdm, tag)) return 0; return 1 + parse_integer_62(rdm); } static uint64_t parse_disambiguator(struct rust_demangler *rdm) { return parse_opt_integer_62(rdm, 's'); } struct rust_mangled_ident { // ASCII part of the identifier. const char *ascii; size_t ascii_len; // Punycode insertion codes for Unicode codepoints, if any. const char *punycode; size_t punycode_len; }; static struct rust_mangled_ident parse_ident(struct rust_demangler *rdm) { struct rust_mangled_ident ident; ident.ascii = NULL; ident.ascii_len = 0; ident.punycode = NULL; ident.punycode_len = 0; bool is_punycode = false; if (rdm->version != -1) { is_punycode = eat(rdm, 'u'); } char c = next(rdm); CHECK_OR(IS_DIGIT(c), return ident); size_t len = c - '0'; if (c != '0') while (IS_DIGIT(peek(rdm))) len = len * 10 + (next(rdm) - '0'); if (rdm->version != -1) { // Skip past the optional `_` separator. eat(rdm, '_'); } size_t start = rdm->next; rdm->next += len; // Check for overflows. CHECK_OR((start <= rdm->next) && (rdm->next <= rdm->sym_len), return ident); ident.ascii = rdm->sym + start; ident.ascii_len = len; if (is_punycode) { ident.punycode_len = 0; while (ident.ascii_len > 0) { ident.ascii_len--; // The last '_' is a separator between ascii & punycode. if (ident.ascii[ident.ascii_len] == '_') break; ident.punycode_len++; } CHECK_OR(ident.punycode_len > 0, return ident); ident.punycode = ident.ascii + (len - ident.punycode_len); } if (ident.ascii_len == 0) ident.ascii = NULL; return ident; } // Printing functions. static void print_str(struct rust_demangler *rdm, const char *data, size_t len) { if (!rdm->errored && !rdm->skipping_printing) rdm->callback(data, len, rdm->callback_opaque); } #define PRINT(s) print_str(rdm, s, strlen(s)) static void print_uint64(struct rust_demangler *rdm, uint64_t x) { char s[21]; sprintf(s, "%" PRIu64, x); PRINT(s); } static void print_uint64_hex(struct rust_demangler *rdm, uint64_t x) { char s[17]; sprintf(s, "%" PRIx64, x); PRINT(s); } static void print_quoted_escaped_char(struct rust_demangler *rdm, char quote, uint32_t c) { CHECK_OR(c < 0xd800 || (c > 0xdfff && c < 0x10ffff), return); switch (c) { case '\0': PRINT("\\0"); break; case '\t': PRINT("\\t"); break; case '\r': PRINT("\\r"); break; case '\n': PRINT("\\n"); break; case '\\': PRINT("\\\\"); break; case '"': if (quote == '"') { PRINT("\\\""); } else { PRINT("\""); } break; case '\'': if (quote == '\'') { PRINT("\\'"); } else { PRINT("'"); } break; default: if (c >= 0x20 && c <= 0x7e) { // Printable ASCII char v = (char)c; print_str(rdm, &v, 1); } else { // FIXME show printable unicode characters without hex encoding PRINT("\\u{"); char s[9] = {0}; sprintf(s, "%" PRIx32, c); PRINT(s); PRINT("}"); } } } static void print_ident(struct rust_demangler *rdm, struct rust_mangled_ident ident) { if (rdm->errored || rdm->skipping_printing) return; if (!ident.punycode) { print_str(rdm, ident.ascii, ident.ascii_len); return; } size_t len = 0; size_t cap = 4; while (cap < ident.ascii_len) { cap *= 2; // Check for overflows. CHECK_OR((cap * 4) / 4 == cap, return); } // Store the output codepoints as groups of 4 UTF-8 bytes. uint8_t *out = (uint8_t *)malloc(cap * 4); CHECK_OR(out, return); // Populate initial output from ASCII fragment. for (len = 0; len < ident.ascii_len; len++) { uint8_t *p = out + 4 * len; p[0] = 0; p[1] = 0; p[2] = 0; p[3] = ident.ascii[len]; } // Punycode parameters and initial state. size_t base = 36; size_t t_min = 1; size_t t_max = 26; size_t skew = 38; size_t damp = 700; size_t bias = 72; size_t i = 0; uint32_t c = 0x80; size_t punycode_pos = 0; while (punycode_pos < ident.punycode_len) { // Read one delta value. size_t delta = 0; size_t w = 1; size_t k = 0; size_t t; uint8_t d; do { k += base; t = k < bias ? 0 : (k - bias); if (t < t_min) t = t_min; if (t > t_max) t = t_max; CHECK_OR(punycode_pos < ident.punycode_len, goto cleanup); d = ident.punycode[punycode_pos++]; if (IS_LOWER(d)) d = d - 'a'; else if (IS_DIGIT(d)) d = 26 + (d - '0'); else ERROR_AND(goto cleanup); delta += d * w; w *= base - t; } while (d >= t); // Compute the new insert position and character. len++; i += delta; c += i / len; i %= len; // Ensure enough space is available. if (cap < len) { cap *= 2; // Check for overflows. CHECK_OR((cap * 4) / 4 == cap, goto cleanup); CHECK_OR(cap >= len, goto cleanup); } uint8_t *p = (uint8_t *)realloc(out, cap * 4); CHECK_OR(p, goto cleanup); out = p; // Move the characters after the insert position. p = out + i * 4; memmove(p + 4, p, (len - i - 1) * 4); // Insert the new character, as UTF-8 bytes. p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0; p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0; p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f); p[3] = 0x80 | (c & 0x3f); // If there are no more deltas, decoding is complete. if (punycode_pos == ident.punycode_len) break; i++; // Perform bias adaptation. delta /= damp; damp = 2; delta += delta / len; k = 0; while (delta > ((base - t_min) * t_max) / 2) { delta /= base - t_min; k += base; } bias = k + ((base - t_min + 1) * delta) / (delta + skew); } // Remove all the 0 bytes to leave behind an UTF-8 string. size_t j; for (i = 0, j = 0; i < len * 4; i++) if (out[i] != 0) out[j++] = out[i]; print_str(rdm, (const char *)out, j); cleanup: free(out); } /// Print the lifetime according to the previously decoded index. /// An index of `0` always refers to `'_`, but starting with `1`, /// indices refer to late-bound lifetimes introduced by a binder. static void print_lifetime_from_index(struct rust_demangler *rdm, uint64_t lt) { PRINT("'"); if (lt == 0) { PRINT("_"); return; } uint64_t depth = rdm->bound_lifetime_depth - lt; // Try to print lifetimes alphabetically first. if (depth < 26) { char c = 'a' + depth; print_str(rdm, &c, 1); } else { // Use `'_123` after running out of letters. PRINT("_"); print_uint64(rdm, depth); } } // Demangling functions. static void demangle_binder(struct rust_demangler *rdm); static void demangle_path(struct rust_demangler *rdm, bool in_value); static void demangle_generic_arg(struct rust_demangler *rdm); static void demangle_type(struct rust_demangler *rdm); static bool demangle_path_maybe_open_generics(struct rust_demangler *rdm); static void demangle_dyn_trait(struct rust_demangler *rdm); static void demangle_const(struct rust_demangler *rdm, bool in_value); static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag); static void demangle_const_str_literal(struct rust_demangler *rdm); /// Optionally enter a binder ('G') for late-bound lifetimes, /// printing e.g. `for<'a, 'b> `, and make those lifetimes visible /// to the caller (via depth level, which the caller should reset). static void demangle_binder(struct rust_demangler *rdm) { CHECK_OR(!rdm->errored, return); uint64_t bound_lifetimes = parse_opt_integer_62(rdm, 'G'); if (bound_lifetimes > 0) { PRINT("for<"); for (uint64_t i = 0; i < bound_lifetimes; i++) { if (i > 0) PRINT(", "); rdm->bound_lifetime_depth++; print_lifetime_from_index(rdm, 1); } PRINT("> "); } } static void demangle_path(struct rust_demangler *rdm, bool in_value) { CHECK_OR(!rdm->errored, return); char tag = next(rdm); switch (tag) { case 'C': { uint64_t dis = parse_disambiguator(rdm); struct rust_mangled_ident name = parse_ident(rdm); print_ident(rdm, name); if (rdm->verbose) { PRINT("["); print_uint64_hex(rdm, dis); PRINT("]"); } break; } case 'N': { char ns = next(rdm); CHECK_OR(IS_LOWER(ns) || IS_UPPER(ns), return); demangle_path(rdm, in_value); uint64_t dis = parse_disambiguator(rdm); struct rust_mangled_ident name = parse_ident(rdm); if (IS_UPPER(ns)) { // Special namespaces, like closures and shims. PRINT("::{"); switch (ns) { case 'C': PRINT("closure"); break; case 'S': PRINT("shim"); break; default: print_str(rdm, &ns, 1); } if (name.ascii || name.punycode) { PRINT(":"); print_ident(rdm, name); } PRINT("#"); print_uint64(rdm, dis); PRINT("}"); } else { // Implementation-specific/unspecified namespaces. if (name.ascii || name.punycode) { PRINT("::"); print_ident(rdm, name); } } break; } case 'M': case 'X': // Ignore the `impl`'s own path. parse_disambiguator(rdm); bool was_skipping_printing = rdm->skipping_printing; rdm->skipping_printing = true; demangle_path(rdm, in_value); rdm->skipping_printing = was_skipping_printing; __attribute__((fallthrough)); case 'Y': PRINT("<"); demangle_type(rdm); if (tag != 'M') { PRINT(" as "); demangle_path(rdm, false); } PRINT(">"); break; case 'I': demangle_path(rdm, in_value); if (in_value) PRINT("::"); PRINT("<"); for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { if (i > 0) PRINT(", "); demangle_generic_arg(rdm); } PRINT(">"); break; case 'B': { size_t backref = parse_integer_62(rdm); if (!rdm->skipping_printing) { size_t old_next = rdm->next; rdm->next = backref; demangle_path(rdm, in_value); rdm->next = old_next; } break; } default: ERROR_AND(return); } } static void demangle_generic_arg(struct rust_demangler *rdm) { if (eat(rdm, 'L')) { uint64_t lt = parse_integer_62(rdm); print_lifetime_from_index(rdm, lt); } else if (eat(rdm, 'K')) demangle_const(rdm, false); else demangle_type(rdm); } static const char *basic_type(char tag) { switch (tag) { case 'b': return "bool"; case 'c': return "char"; case 'e': return "str"; case 'u': return "()"; case 'a': return "i8"; case 's': return "i16"; case 'l': return "i32"; case 'x': return "i64"; case 'n': return "i128"; case 'i': return "isize"; case 'h': return "u8"; case 't': return "u16"; case 'm': return "u32"; case 'y': return "u64"; case 'o': return "u128"; case 'j': return "usize"; case 'f': return "f32"; case 'd': return "f64"; case 'z': return "!"; case 'p': return "_"; case 'v': return "..."; default: return NULL; } } static void demangle_type(struct rust_demangler *rdm) { CHECK_OR(!rdm->errored, return); char tag = next(rdm); const char *basic = basic_type(tag); if (basic) { PRINT(basic); return; } switch (tag) { case 'R': case 'Q': PRINT("&"); if (eat(rdm, 'L')) { uint64_t lt = parse_integer_62(rdm); if (lt) { print_lifetime_from_index(rdm, lt); PRINT(" "); } } if (tag != 'R') PRINT("mut "); demangle_type(rdm); break; case 'P': case 'O': PRINT("*"); if (tag != 'P') PRINT("mut "); else PRINT("const "); demangle_type(rdm); break; case 'A': case 'S': PRINT("["); demangle_type(rdm); if (tag == 'A') { PRINT("; "); demangle_const(rdm, true); } PRINT("]"); break; case 'T': { PRINT("("); size_t i; for (i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { if (i > 0) PRINT(", "); demangle_type(rdm); } if (i == 1) PRINT(","); PRINT(")"); break; } case 'F': { uint64_t old_bound_lifetime_depth = rdm->bound_lifetime_depth; demangle_binder(rdm); if (eat(rdm, 'U')) PRINT("unsafe "); if (eat(rdm, 'K')) { struct rust_mangled_ident abi; if (eat(rdm, 'C')) { abi.ascii = "C"; abi.ascii_len = 1; } else { abi = parse_ident(rdm); CHECK_OR(abi.ascii && !abi.punycode, goto restore); } PRINT("extern \""); // If the ABI had any `-`, they were replaced with `_`, // so the parts between `_` have to be re-joined with `-`. for (size_t i = 0; i < abi.ascii_len; i++) { if (abi.ascii[i] == '_') { print_str(rdm, abi.ascii, i); PRINT("-"); abi.ascii += i + 1; abi.ascii_len -= i + 1; i = 0; } } print_str(rdm, abi.ascii, abi.ascii_len); PRINT("\" "); } PRINT("fn("); for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { if (i > 0) PRINT(", "); demangle_type(rdm); } PRINT(")"); if (eat(rdm, 'u')) { // Skip printing the return type if it's 'u', i.e. `()`. } else { PRINT(" -> "); demangle_type(rdm); } // Restore `bound_lifetime_depth` to outside the binder. restore: rdm->bound_lifetime_depth = old_bound_lifetime_depth; break; } case 'D': PRINT("dyn "); uint64_t old_bound_lifetime_depth = rdm->bound_lifetime_depth; demangle_binder(rdm); for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { if (i > 0) PRINT(" + "); demangle_dyn_trait(rdm); } // Restore `bound_lifetime_depth` to outside the binder. rdm->bound_lifetime_depth = old_bound_lifetime_depth; CHECK_OR(eat(rdm, 'L'), return); uint64_t lt = parse_integer_62(rdm); if (lt) { PRINT(" + "); print_lifetime_from_index(rdm, lt); } break; case 'B': { size_t backref = parse_integer_62(rdm); if (!rdm->skipping_printing) { size_t old_next = rdm->next; rdm->next = backref; demangle_type(rdm); rdm->next = old_next; } break; } default: // Go back to the tag, so `demangle_path` also sees it. rdm->next--; demangle_path(rdm, false); } } /// A trait in a trait object may have some "existential projections" /// (i.e. associated type bindings) after it, which should be printed /// in the `<...>` of the trait, e.g. `dyn Trait`. /// To this end, this method will keep the `<...>` of an 'I' path /// open, by omitting the `>`, and return `Ok(true)` in that case. static bool demangle_path_maybe_open_generics(struct rust_demangler *rdm) { bool open = false; CHECK_OR(!rdm->errored, return open); if (eat(rdm, 'B')) { size_t backref = parse_integer_62(rdm); if (!rdm->skipping_printing) { size_t old_next = rdm->next; rdm->next = backref; open = demangle_path_maybe_open_generics(rdm); rdm->next = old_next; } } else if (eat(rdm, 'I')) { demangle_path(rdm, false); PRINT("<"); open = true; for (size_t i = 0; !rdm->errored && !eat(rdm, 'E'); i++) { if (i > 0) PRINT(", "); demangle_generic_arg(rdm); } } else demangle_path(rdm, false); return open; } static void demangle_dyn_trait(struct rust_demangler *rdm) { CHECK_OR(!rdm->errored, return); bool open = demangle_path_maybe_open_generics(rdm); while (eat(rdm, 'p')) { if (!open) PRINT("<"); else PRINT(", "); open = true; struct rust_mangled_ident name = parse_ident(rdm); print_ident(rdm, name); PRINT(" = "); demangle_type(rdm); } if (open) PRINT(">"); } static void demangle_const(struct rust_demangler *rdm, bool in_value) { CHECK_OR(!rdm->errored, return); bool opened_brace = false; char ty_tag = next(rdm); switch (ty_tag) { case 'p': PRINT("_"); break; // Unsigned integer types. case 'h': case 't': case 'm': case 'y': case 'o': case 'j': demangle_const_uint(rdm, ty_tag); break; case 'a': case 's': case 'l': case 'x': case 'n': case 'i': if (eat(rdm, 'n')) { PRINT("-"); } demangle_const_uint(rdm, ty_tag); break; case 'b': { struct hex_nibbles hex = parse_hex_nibbles_for_const_uint(rdm); CHECK_OR(!rdm->errored && hex.nibbles_len <= 1, return); uint8_t v = hex.nibbles_len > 0 ? decode_hex_nibble(hex.nibbles[0]) : 0; CHECK_OR(v <= 1, return); PRINT(v == 1 ? "true" : "false"); break; } case 'c': { struct hex_nibbles hex = parse_hex_nibbles_for_const_uint(rdm); CHECK_OR(!rdm->errored && hex.nibbles_len <= 6, return); uint32_t c = 0; for (size_t i = 0; i < hex.nibbles_len; i++) c = (c << 4) | decode_hex_nibble(hex.nibbles[i]); PRINT("'"); print_quoted_escaped_char(rdm, '\'', c); PRINT("'"); break; } case 'e': // NOTE(eddyb) a string literal `"..."` has type `&str`, so // to get back the type `str`, `*"..."` syntax is needed // (even if that may not be valid in Rust itself). if (!in_value) { opened_brace = true; PRINT("{"); } PRINT("*"); demangle_const_str_literal(rdm); break; case 'R': case 'Q': if (ty_tag == 'R' && eat(rdm, 'e')) { // NOTE(eddyb) this prints `"..."` instead of `&*"..."`, which // is what `Re..._` would imply (see comment for `str` above). demangle_const_str_literal(rdm); break; } if (!in_value) { opened_brace = true; PRINT("{"); } PRINT("&"); if (ty_tag != 'R') { PRINT("mut "); } demangle_const(rdm, true); break; case 'A': { if (!in_value) { opened_brace = true; PRINT("{"); } PRINT("["); size_t i = 0; while (!eat(rdm, 'E')) { CHECK_OR(!rdm->errored, return); if (i > 0) PRINT(", "); demangle_const(rdm, true); i += 1; } PRINT("]"); break; } case 'T': { if (!in_value) { opened_brace = true; PRINT("{"); } PRINT("("); size_t i = 0; while (!eat(rdm, 'E')) { CHECK_OR(!rdm->errored, return); if (i > 0) PRINT(", "); demangle_const(rdm, true); i += 1; } if (i == 1) PRINT(","); PRINT(")"); break; } case 'V': if (!in_value) { opened_brace = true; PRINT("{"); } demangle_path(rdm, true); switch (next(rdm)) { case 'U': break; case 'T': { PRINT("("); size_t i = 0; while (!eat(rdm, 'E')) { CHECK_OR(!rdm->errored, return); if (i > 0) PRINT(", "); demangle_const(rdm, true); i += 1; } PRINT(")"); break; } case 'S': { PRINT(" { "); size_t i = 0; while (!eat(rdm, 'E')) { CHECK_OR(!rdm->errored, return); if (i > 0) PRINT(", "); parse_disambiguator(rdm); struct rust_mangled_ident name = parse_ident(rdm); print_ident(rdm, name); PRINT(": "); demangle_const(rdm, true); i += 1; } PRINT(" }"); break; } default: ERROR_AND(return); } break; case 'B': { size_t backref = parse_integer_62(rdm); if (!rdm->skipping_printing) { size_t old_next = rdm->next; rdm->next = backref; demangle_const(rdm, in_value); rdm->next = old_next; } break; } default: ERROR_AND(return); } if (opened_brace) { PRINT("}"); } } static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag) { CHECK_OR(!rdm->errored, return); struct hex_nibbles hex = parse_hex_nibbles_for_const_uint(rdm); CHECK_OR(!rdm->errored, return); // Print anything that doesn't fit in `uint64_t` verbatim. if (hex.nibbles_len > 16) { PRINT("0x"); print_str(rdm, hex.nibbles, hex.nibbles_len); } else { uint64_t v = 0; for (size_t i = 0; i < hex.nibbles_len; i++) v = (v << 4) | decode_hex_nibble(hex.nibbles[i]); print_uint64(rdm, v); } if (rdm->verbose) PRINT(basic_type(ty_tag)); } // UTF-8 uses an unary encoding for its "length" field (`1`s followed by a `0`). struct utf8_byte { // Decoded "length" field of an UTF-8 byte, including the special cases: // - `0` indicates this is a lone ASCII byte // - `1` indicates a continuation byte (cannot start an UTF-8 sequence) size_t seq_len; // Remaining (`payload_width`) bits in the UTF-8 byte, contributing to // the Unicode scalar value being encoded in the UTF-8 sequence. uint8_t payload; size_t payload_width; }; static struct utf8_byte utf8_decode(uint8_t byte) { struct utf8_byte utf8; utf8.seq_len = 0; utf8.payload = byte; utf8.payload_width = 8; // FIXME(eddyb) figure out if using "count leading ones/zeros" is an option. while (utf8.seq_len <= 6) { uint8_t msb = 0x80 >> utf8.seq_len; utf8.payload &= ~msb; utf8.payload_width--; if ((byte & msb) == 0) break; utf8.seq_len++; } return utf8; } static void demangle_const_str_literal(struct rust_demangler *rdm) { CHECK_OR(!rdm->errored, return); struct hex_nibbles hex = parse_hex_nibbles_for_const_bytes(rdm); CHECK_OR(!rdm->errored, return); PRINT("\""); for (size_t i = 0; i < hex.nibbles_len; i += 2) { struct utf8_byte utf8 = utf8_decode( (decode_hex_nibble(hex.nibbles[i]) << 4) | decode_hex_nibble(hex.nibbles[i + 1]) ); uint32_t c = utf8.payload; if (utf8.seq_len > 0) { CHECK_OR(utf8.seq_len >= 2 && utf8.seq_len <= 4, return); for (size_t extra = utf8.seq_len - 1; extra > 0; extra--) { i += 2; utf8 = utf8_decode( (decode_hex_nibble(hex.nibbles[i]) << 4) | decode_hex_nibble(hex.nibbles[i + 1]) ); CHECK_OR(utf8.seq_len == 1, return); c = (c << utf8.payload_width) | utf8.payload; } } print_quoted_escaped_char(rdm, '"', c); } PRINT("\""); } static bool is_rust_hash(struct rust_mangled_ident name) { if (name.ascii[0] != 'h') { return false; } for (size_t i = 1; i < name.ascii_len; i++) { if (!IS_DIGIT(name.ascii[i]) && !(name.ascii[i] >= 'a' && name.ascii[i] <= 'f')) { return false; } } return true; } static void print_legacy_ident( struct rust_demangler *rdm, struct rust_mangled_ident ident ) { if (rdm->errored || rdm->skipping_printing) return; CHECK_OR(!ident.punycode, return); if (ident.ascii[0] == '_' && ident.ascii[1] == '$') { ident.ascii += 1; ident.ascii_len -= 1; } while (1) { if (ident.ascii_len == 0) { break; } else if (ident.ascii[0] == '.') { if (ident.ascii_len >= 2 && ident.ascii[1] == '.') { PRINT("::"); ident.ascii += 2; ident.ascii_len -= 2; } else { PRINT("."); ident.ascii += 1; ident.ascii_len -= 1; } } else if (ident.ascii[0] == '$') { const char *end_ptr = (const char *)memchr(&ident.ascii[1], '$', ident.ascii_len - 1); if (!end_ptr) break; const char *escape = &ident.ascii[1]; size_t escape_len = end_ptr - escape; if (strncmp(escape, "SP", 2) == 0) { PRINT("@"); } else if (strncmp(escape, "BP", 2) == 0) { PRINT("*"); } else if (strncmp(escape, "RF", 2) == 0) { PRINT("&"); } else if (strncmp(escape, "LT", 2) == 0) { PRINT("<"); } else if (strncmp(escape, "GT", 2) == 0) { PRINT(">"); } else if (strncmp(escape, "LP", 2) == 0) { PRINT("("); } else if (strncmp(escape, "RP", 2) == 0) { PRINT(")"); } else if (strncmp(escape, "C", 1) == 0) { PRINT(","); } else { if (escape[0] != 'u') { break; } const char *digits = &escape[1]; size_t digits_len = escape_len - 1; bool invalid = false; for (size_t i = 1; i < digits_len; i++) { if (!IS_DIGIT(digits[i]) && !(digits[i] >= 'a' && digits[i] <= 'f')) { invalid = true; break; } } if (invalid) break; struct hex_nibbles hex; hex.nibbles = digits; hex.nibbles_len = digits_len; uint32_t c = 0; for (size_t i = 0; i < hex.nibbles_len; i++) c = (c << 4) | decode_hex_nibble(hex.nibbles[i]); if (!(c < 0xd800 || (c > 0xdfff && c < 0x10ffff))) { break; // Not a valid unicode scalar } if (c >= 0x20 && c <= 0x7e) { // Printable ASCII char v = (char)c; print_str(rdm, &v, 1); } else { // FIXME show printable unicode characters without hex // encoding PRINT("\\u{"); char s[9] = {0}; sprintf(s, "%" PRIx32, c); PRINT(s); PRINT("}"); } } ident.ascii += escape_len + 2; ident.ascii_len -= escape_len + 2; } else { bool found = false; for (size_t i = 0; i < ident.ascii_len; i++) { if (ident.ascii[i] == '$' || ident.ascii[i] == '.') { print_str(rdm, ident.ascii, i); ident.ascii += i; ident.ascii_len -= i; found = true; break; } } if (!found) { break; } } } print_str(rdm, ident.ascii, ident.ascii_len); } static void demangle_legacy_path(struct rust_demangler *rdm) { bool first = true; while (1) { if (eat(rdm, 'E')) { // FIXME Maybe check if at end of symbol? return; } struct rust_mangled_ident name = parse_ident(rdm); if (!rdm->verbose && peek(rdm) == 'E' && is_rust_hash(name)) { // Skip printing the hash if verbose mode is disabled. eat(rdm, 'E'); break; } if (!first) { PRINT("::"); } first = false; print_legacy_ident(rdm, name); CHECK_OR(!rdm->errored, return); } } bool sysprof_rust_demangle_with_callback( const char *whole_mangled_symbol, int flags, void (*callback)(const char *data, size_t len, void *opaque), void *opaque ) { struct rust_demangler rdm; rdm.sym = whole_mangled_symbol; rdm.sym_len = 0; rdm.callback_opaque = opaque; rdm.callback = callback; rdm.next = 0; rdm.errored = false; rdm.skipping_printing = false; rdm.verbose = (flags & RUST_DEMANGLE_FLAG_VERBOSE) != 0; rdm.version = -2; // Invalid version rdm.bound_lifetime_depth = 0; // Rust symbols always start with R, _R or __R for the v0 scheme or ZN, _ZN // or __ZN for the legacy scheme. if (strncmp(rdm.sym, "_R", 2) == 0) { rdm.sym += 2; rdm.version = 0; // v0 } else if (rdm.sym[0] == 'R') { // On Windows, dbghelp strips leading underscores, so we accept "R..." // form too. rdm.sym += 1; rdm.version = 0; // v0 } else if (strncmp(rdm.sym, "__R", 3) == 0) { // On OSX, symbols are prefixed with an extra _ rdm.sym += 3; rdm.version = 0; // v0 } else if (strncmp(rdm.sym, "_ZN", 3) == 0) { rdm.sym += 3; rdm.version = -1; // legacy } else if (strncmp(rdm.sym, "ZN", 2) == 0) { // On Windows, dbghelp strips leading underscores, so we accept "R..." // form too. rdm.sym += 2; rdm.version = -1; // legacy } else if (strncmp(rdm.sym, "__ZN", 4) == 0) { // On OSX, symbols are prefixed with an extra _ rdm.sym += 4; rdm.version = -1; // legacy } else { return false; } if (rdm.version != -1) { // Paths always start with uppercase characters. if (!IS_UPPER(rdm.sym[0])) return false; } // Rust symbols only use ASCII characters. for (const char *p = rdm.sym; *p; p++) { if ((*p & 0x80) != 0) return false; if (*p == '.' && strncmp(p, ".llvm.", 6) == 0) { // Ignore .llvm. suffixes break; } rdm.sym_len++; } if (rdm.version == -1) { demangle_legacy_path(&rdm); } else { demangle_path(&rdm, true); // Skip instantiating crate. if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' && peek(&rdm) <= 'Z') { rdm.skipping_printing = true; demangle_path(&rdm, false); } } if (!rdm.errored && (rdm.sym_len - rdm.next > 0)) { for (const char *p = rdm.sym + rdm.next; *p; p++) { // FIXME match is_symbol_like from rustc-demangle if (!((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9') || *p == '.')) { // Suffix is not a symbol like string return false; } } // Print LLVM produced suffix print_str(&rdm, rdm.sym + rdm.next, rdm.sym_len - rdm.next); } return !rdm.errored; } // Growable string buffers. struct str_buf { char *ptr; size_t len; size_t cap; bool errored; }; static void str_buf_reserve(struct str_buf *buf, size_t extra) { // Allocation failed before. if (buf->errored) return; size_t available = buf->cap - buf->len; if (extra <= available) return; size_t min_new_cap = buf->cap + (extra - available); // Check for overflows. if (min_new_cap < buf->cap) { buf->errored = true; return; } size_t new_cap = buf->cap; if (new_cap == 0) new_cap = 4; // Double capacity until sufficiently large. while (new_cap < min_new_cap) { new_cap *= 2; // Check for overflows. if (new_cap < buf->cap) { buf->errored = true; return; } } char *new_ptr = (char *)realloc(buf->ptr, new_cap); if (new_ptr == NULL) { free(buf->ptr); buf->ptr = NULL; buf->len = 0; buf->cap = 0; buf->errored = true; } else { buf->ptr = new_ptr; buf->cap = new_cap; } } static void str_buf_append(struct str_buf *buf, const char *data, size_t len) { str_buf_reserve(buf, len); if (buf->errored) return; memcpy(buf->ptr + buf->len, data, len); buf->len += len; } static void str_buf_demangle_callback(const char *data, size_t len, void *opaque) { str_buf_append(opaque, data, len); } char *sysprof_rust_demangle(const char *mangled, int flags) { struct str_buf out; char *ret; out.ptr = NULL; out.len = 0; out.cap = 0; out.errored = false; bool success = sysprof_rust_demangle_with_callback( mangled, flags, str_buf_demangle_callback, &out ); if (!success) { free(out.ptr); return NULL; } str_buf_append(&out, "\0", 1); ret = g_strdup(out.ptr); free(out.ptr); return ret; } #pragma GCC diagnostic pop