Update the vendored version of rust-demangle.c

https://github.com/LykenSol/rust-demangle.c/pull/2 has been merged.
The main change from the PR as of me vendoring it is new support
for decoding and escaping UTF-8 string literals. In addition
clang-format was used for formatting the file.
This commit is contained in:
bjorn3
2023-09-26 19:02:57 +02:00
parent 467fe307f5
commit 242acd4489
2 changed files with 152 additions and 111 deletions

View File

@ -1,7 +1,8 @@
/* /*
Imported from https://github.com/LykenSol/rust-demangle.c/pull/2 commit ea6fddfbf526700ee989336d9ff78797e38365eb Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
Modifications from upstream: Modifications from upstream:
* Add sysprof_ prefix to exported symbols * Add sysprof_ prefix to exported symbols
* Use g_strdup in sysprof_rust_demangle
*/ */
#pragma GCC diagnostic push #pragma GCC diagnostic push
@ -79,6 +80,54 @@ static char next(struct rust_demangler *rdm) {
return c; return c;
} }
struct hex_nibbles {
const char *nibbles;
size_t nibbles_len;
};
static struct hex_nibbles parse_hex_nibbles(struct rust_demangler *rdm) {
struct hex_nibbles hex;
hex.nibbles = NULL;
hex.nibbles_len = 0;
size_t start = rdm->next, hex_len = 0;
while (!eat(rdm, '_')) {
char c = next(rdm);
CHECK_OR(IS_DIGIT(c) || (c >= 'a' && c <= 'f'), return hex);
hex_len++;
}
hex.nibbles = rdm->sym + start;
hex.nibbles_len = hex_len;
return hex;
}
static struct hex_nibbles
parse_hex_nibbles_for_const_uint(struct rust_demangler *rdm) {
struct hex_nibbles hex = parse_hex_nibbles(rdm);
CHECK_OR(!rdm->errored, return hex);
// Trim leading `0`s.
while (hex.nibbles_len > 0 && *hex.nibbles == '0') {
hex.nibbles++;
hex.nibbles_len--;
}
return hex;
}
static struct hex_nibbles
parse_hex_nibbles_for_const_bytes(struct rust_demangler *rdm) {
struct hex_nibbles hex = parse_hex_nibbles(rdm);
CHECK_OR(!rdm->errored && (hex.nibbles_len % 2 == 0), return hex);
return hex;
}
static uint8_t decode_hex_nibble(char nibble) {
return nibble >= 'a' ? 10 + (nibble - 'a') : nibble - '0';
}
static uint64_t parse_integer_62(struct rust_demangler *rdm) { static uint64_t parse_integer_62(struct rust_demangler *rdm) {
if (eat(rdm, '_')) if (eat(rdm, '_'))
return 0; return 0;
@ -191,7 +240,10 @@ static void print_uint64_hex(struct rust_demangler *rdm, uint64_t x) {
PRINT(s); PRINT(s);
} }
static void print_quoted_escaped_char(struct rust_demangler *rdm, char quote, uint32_t c) { static void
print_quoted_escaped_char(struct rust_demangler *rdm, char quote, uint32_t c) {
CHECK_OR(c < 0xd800 || (c > 0xdfff && c < 0x10ffff), return);
switch (c) { switch (c) {
case '\0': case '\0':
PRINT("\\0"); PRINT("\\0");
@ -830,55 +882,24 @@ static void demangle_const(struct rust_demangler *rdm, bool in_value) {
break; break;
case 'b': { case 'b': {
uint64_t value = 0; struct hex_nibbles hex = parse_hex_nibbles_for_const_uint(rdm);
size_t hex_len = 0; CHECK_OR(!rdm->errored && hex.nibbles_len <= 1, return);
while (!eat(rdm, '_')) { uint8_t v = hex.nibbles_len > 0 ? decode_hex_nibble(hex.nibbles[0]) : 0;
value <<= 4; CHECK_OR(v <= 1, return);
PRINT(v == 1 ? "true" : "false");
char c = next(rdm);
if (IS_DIGIT(c))
value |= c - '0';
else if (c >= 'a' && c <= 'f')
value |= 10 + (c - 'a');
else
ERROR_AND(return );
hex_len++;
}
if (value == 0) {
PRINT("false");
} else if (value == 1) {
PRINT("true");
} else {
ERROR_AND(return );
}
break; break;
} }
case 'c': { case 'c': {
uint64_t value = 0; struct hex_nibbles hex = parse_hex_nibbles_for_const_uint(rdm);
size_t hex_len = 0; CHECK_OR(!rdm->errored && hex.nibbles_len <= 6, return);
while (!eat(rdm, '_')) {
value <<= 4;
char c = next(rdm); uint32_t c = 0;
if (IS_DIGIT(c)) for (size_t i = 0; i < hex.nibbles_len; i++)
value |= c - '0'; c = (c << 4) | decode_hex_nibble(hex.nibbles[i]);
else if (c >= 'a' && c <= 'f')
value |= 10 + (c - 'a');
else
ERROR_AND(return );
hex_len++;
}
if (value >= 0x10FFFF)
ERROR_AND(return );
if (value >= 0xD800 && value <= 0xDFFF)
ERROR_AND(return );
PRINT("'"); PRINT("'");
print_quoted_escaped_char(rdm, '\'', value); print_quoted_escaped_char(rdm, '\'', c);
PRINT("'"); PRINT("'");
break; break;
@ -1056,64 +1077,83 @@ static void demangle_const(struct rust_demangler *rdm, bool in_value) {
static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag) { static void demangle_const_uint(struct rust_demangler *rdm, char ty_tag) {
CHECK_OR(!rdm->errored, return); CHECK_OR(!rdm->errored, return);
uint64_t value = 0; struct hex_nibbles hex = parse_hex_nibbles_for_const_uint(rdm);
size_t hex_len = 0; CHECK_OR(!rdm->errored, return);
while (!eat(rdm, '_')) {
value <<= 4;
char c = next(rdm);
if (IS_DIGIT(c))
value |= c - '0';
else if (c >= 'a' && c <= 'f')
value |= 10 + (c - 'a');
else
ERROR_AND(return );
hex_len++;
}
// Print anything that doesn't fit in `uint64_t` verbatim. // Print anything that doesn't fit in `uint64_t` verbatim.
if (hex_len > 16) { if (hex.nibbles_len > 16) {
PRINT("0x"); PRINT("0x");
print_str(rdm, rdm->sym + (rdm->next - hex_len - 1), hex_len); print_str(rdm, hex.nibbles, hex.nibbles_len);
} else { } else {
print_uint64(rdm, value); uint64_t v = 0;
for (size_t i = 0; i < hex.nibbles_len; i++)
v = (v << 4) | decode_hex_nibble(hex.nibbles[i]);
print_uint64(rdm, v);
} }
if (rdm->verbose) if (rdm->verbose)
PRINT(basic_type(ty_tag)); PRINT(basic_type(ty_tag));
} }
// UTF-8 uses an unary encoding for its "length" field (`1`s followed by a `0`).
struct utf8_byte {
// Decoded "length" field of an UTF-8 byte, including the special cases:
// - `0` indicates this is a lone ASCII byte
// - `1` indicates a continuation byte (cannot start an UTF-8 sequence)
size_t seq_len;
// Remaining (`payload_width`) bits in the UTF-8 byte, contributing to
// the Unicode scalar value being encoded in the UTF-8 sequence.
uint8_t payload;
size_t payload_width;
};
static struct utf8_byte utf8_decode(uint8_t byte) {
struct utf8_byte utf8;
utf8.seq_len = 0;
utf8.payload = byte;
utf8.payload_width = 8;
// FIXME(eddyb) figure out if using "count leading ones/zeros" is an option.
while (utf8.seq_len <= 6) {
uint8_t msb = 0x80 >> utf8.seq_len;
utf8.payload &= ~msb;
utf8.payload_width--;
if ((byte & msb) == 0)
break;
utf8.seq_len++;
}
return utf8;
}
static void demangle_const_str_literal(struct rust_demangler *rdm) { static void demangle_const_str_literal(struct rust_demangler *rdm) {
CHECK_OR(!rdm->errored, return); CHECK_OR(!rdm->errored, return);
struct hex_nibbles hex = parse_hex_nibbles_for_const_bytes(rdm);
CHECK_OR(!rdm->errored, return);
PRINT("\""); PRINT("\"");
for (size_t i = 0; i < hex.nibbles_len; i += 2) {
// FIXME(bjorn3) actually decode UTF-8 strings into individual characters struct utf8_byte utf8 = utf8_decode(
while (!eat(rdm, '_')) { (decode_hex_nibble(hex.nibbles[i]) << 4) |
uint32_t value = 0; decode_hex_nibble(hex.nibbles[i + 1])
);
char c = next(rdm); uint32_t c = utf8.payload;
if (IS_DIGIT(c)) if (utf8.seq_len > 0) {
value |= c - '0'; CHECK_OR(utf8.seq_len >= 2 && utf8.seq_len <= 4, return);
else if (c >= 'a' && c <= 'f') for (size_t extra = utf8.seq_len - 1; extra > 0; extra--) {
value |= 10 + (c - 'a'); i += 2;
else utf8 = utf8_decode(
ERROR_AND(return ); (decode_hex_nibble(hex.nibbles[i]) << 4) |
decode_hex_nibble(hex.nibbles[i + 1])
value <<= 4; );
CHECK_OR(utf8.seq_len == 1, return);
c = next(rdm); c = (c << utf8.payload_width) | utf8.payload;
if (IS_DIGIT(c)) }
value |= c - '0'; }
else if (c >= 'a' && c <= 'f') print_quoted_escaped_char(rdm, '"', c);
value |= 10 + (c - 'a');
else
ERROR_AND(return );
print_quoted_escaped_char(rdm, '"', value);
} }
PRINT("\""); PRINT("\"");
} }
@ -1169,7 +1209,8 @@ bool sysprof_rust_demangle_with_callback(
demangle_path(&rdm, true); demangle_path(&rdm, true);
// Skip instantiating crate. // Skip instantiating crate.
if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' && peek(&rdm) <= 'Z') { if (!rdm.errored && rdm.next < rdm.sym_len && peek(&rdm) >= 'A' &&
peek(&rdm) <= 'Z') {
rdm.skipping_printing = true; rdm.skipping_printing = true;
demangle_path(&rdm, false); demangle_path(&rdm, false);
} }

View File

@ -1,5 +1,5 @@
/* /*
Imported from https://github.com/LykenSol/rust-demangle.c commit eed29f57732ddb2be434ec89f8ede9b695e5e157 Imported from https://github.com/LykenSol/rust-demangle.c commit 4b3529ee0060d318c60459fbe9a0d95fff82e74f
Modifications from upstream: Modifications from upstream:
* Add sysprof_ prefix to exported symbols and mark them as hidden * Add sysprof_ prefix to exported symbols and mark them as hidden
* Add pragma once * Add pragma once