From 1b72901c4ddd3101d9035a362da9d9220bd18543 Mon Sep 17 00:00:00 2001 From: Soren Sandmann Date: Mon, 21 Aug 2006 00:18:10 +0000 Subject: [PATCH] New function 2006-08-20 Soren Sandmann * elfparser.c (elf_parser_get_debug_link): New function * elfparser.c: Delete SymbolTable typedef * binparser.[ch] (bin_parser_get_data): New function (bin_parser_get_length): New function * elfparser.[ch] (elf_parser_get_crc32): New function * TODO: Updates * elfparser.c (elf_parser_lookup_symbol): Offset passed in addresses by the load address. * elfparser.c (elf_parser_get_load_address): New function to compute the load address of the beginning of the file. --- ChangeLog | 19 +++++ TODO | 41 ++++++++++- binparser.c | 14 +++- binparser.h | 18 +++-- elfparser.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++------ elfparser.h | 2 +- testelf.c | 18 +++-- 7 files changed, 279 insertions(+), 39 deletions(-) diff --git a/ChangeLog b/ChangeLog index a92e696d..c294a6ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2006-08-20 Soren Sandmann + + * elfparser.c (elf_parser_get_debug_link): New function + + * elfparser.c: Delete SymbolTable typedef + + * binparser.[ch] (bin_parser_get_data): New function + (bin_parser_get_length): New function + + * elfparser.[ch] (elf_parser_get_crc32): New function + + * TODO: Updates + + * elfparser.c (elf_parser_lookup_symbol): Offset passed in + addresses by the load address. + + * elfparser.c (elf_parser_get_load_address): New function to + compute the load address of the beginning of the file. + 2006-08-20 Soren Sandmann * elfparser.c (elf_sym_get_name): Read the name out of the file diff --git a/TODO b/TODO index 27cbdcde..b779aed6 100644 --- a/TODO +++ b/TODO @@ -32,6 +32,41 @@ Before 1.0.4: Before 1.2: +* Rethink binparser. Maybe the default mode should be: + - there is a current offset + - you can move the cursor + - _goto() + - _align() + - you can read structs with "begin_struct (format) / end_struct()" + Or maybe just "set_format()" that would accept NULL? + - when you are reading a struct, you can skip records with _index() + - you can read fields with get_string/get_uint by passing a name. + - you can read anonymous strings and uints by passing NULL for name + This is allowed even when not reading structs. Or maybe this + should be separate functions. Advantages: + - they can skip ahead, unlike fields accessors + - you can access specific types (8,16,32,64) + - there is no "name" field + Disadvantage: + - the field accesors would need renaming. + bin_parser_get_uint_field () + is not really that bad though. + Maybe begin_record() could return a structure you could + use to access that particular record? Would nicely solve + the problems with "goto" and "index". + bin_record_get_uint(); + What should begin/end be called? They will have different + objects passed. + bin_parser_get_record (parser) -> record + bin_record_free (record); + - Maybe support for indirect strings? Ie., get_string() in elfparser + - This will require endianness to be a per-parser property. Which is + probably just fine. Although d-bus actually has + per-message endianness. Maybe there could be a settable + "endianness" property. + + Also need to add error checking. + * Rename stack_stash_foreach_by_address() to stack_stash_foreach_unique(), or maybe not ... @@ -342,9 +377,6 @@ http://www.linuxbase.org/spec/booksets/LSB-Embedded/LSB-Embedded/ehframe.html - possibly add dependency on glib 2.8 if it is released at that point. (g_file_replace()) -- somehow get access to VSEnterprise profiler and see how it works. - somehow get access to vtune and see how it works. - * Some notes about timer interrupt handling in Linux On an SMP system APIC is used - the interesting file is arch/i386/kernel/apic.c @@ -368,6 +400,9 @@ When the interrupt happens, Later: +- somehow get access to VSEnterprise profiler and see how it works. + somehow get access to vtune and see how it works. + - On SMP systems interrupts happen unpredictably, including when another one is running. Right now we are ignoring any interrupts that happen when another one is running, but we should probably just save the data diff --git a/binparser.c b/binparser.c index 65273239..a37a3028 100644 --- a/binparser.c +++ b/binparser.c @@ -333,9 +333,21 @@ bin_parser_get_offset (BinParser *parser) return parser->frame->offset; } +const guchar * +bin_parser_get_data (BinParser *parser) +{ + return parser->data; +} + +gsize +bin_parser_get_length (BinParser *parser) +{ + return parser->length; +} + BinField * bin_field_new_fixed_array (int n_elements, - int element_size) + int element_size) { BinField *field = g_new0 (BinField, 1); field->width = n_elements * element_size; diff --git a/binparser.h b/binparser.h index 26a545e9..f4583980 100644 --- a/binparser.h +++ b/binparser.h @@ -3,13 +3,13 @@ typedef struct BinField BinField; typedef struct BinFormat BinFormat; typedef struct BinParser BinParser; + +/* BinParser */ BinParser *bin_parser_new (const guchar *data, gsize length); -BinFormat *bin_format_new (gboolean big_endian, - const char *name, BinField *field, - ...); +const guchar *bin_parser_get_data (BinParser *parser); +gsize bin_parser_get_length (BinParser *parser); gsize bin_parser_get_offset (BinParser *parser); -gsize bin_format_get_size (BinFormat *format); void bin_parser_index (BinParser *parser, int index); void bin_parser_begin (BinParser *parser, BinFormat *format, @@ -18,9 +18,17 @@ void bin_parser_end (BinParser *parser); const char *bin_parser_get_string (BinParser *parser); guint64 bin_parser_get_uint (BinParser *parser, const gchar *name); + +/* BinFormat */ +BinFormat *bin_format_new (gboolean big_endian, + const char *name, BinField *field, + ...); +gsize bin_format_get_size (BinFormat *format); + +/* BinField */ BinField *bin_field_new_uint8 (void); BinField *bin_field_new_uint16 (void); BinField *bin_field_new_uint32 (void); BinField *bin_field_new_uint64 (void); BinField *bin_field_new_fixed_array (int n_elements, - int element_size); + int element_size); diff --git a/elfparser.c b/elfparser.c index 6bad324c..a69d4c60 100644 --- a/elfparser.c +++ b/elfparser.c @@ -4,13 +4,8 @@ #include "binparser.h" #include "elfparser.h" -typedef struct SymbolTable SymbolTable; typedef struct Section Section; -struct SymbolTable -{ -}; - struct ElfSym { gulong offset; @@ -22,23 +17,25 @@ struct Section const gchar * name; gsize offset; gsize size; + gboolean allocated; + gulong load_address; }; struct ElfParser { - BinParser * parser; + BinParser * parser; - BinFormat * header; - BinFormat * strtab_format; - BinFormat * shn_entry; - BinFormat * sym_format; + BinFormat * header; + BinFormat * strtab_format; + BinFormat * shn_entry; + BinFormat * sym_format; - int n_sections; - Section ** sections; + int n_sections; + Section ** sections; - int n_symbols; - ElfSym * symbols; - gsize sym_strings; + int n_symbols; + ElfSym * symbols; + gsize sym_strings; }; static gboolean parse_elf_signature (const guchar *data, gsize length, @@ -72,11 +69,20 @@ section_new (ElfParser *parser, { BinParser *bparser = parser->parser; Section *section = g_new (Section, 1); + guint64 flags; section->name = get_string (bparser, name_table, "sh_name"); section->size = bin_parser_get_uint (bparser, "sh_size"); section->offset = bin_parser_get_uint (bparser, "sh_offset"); + flags = bin_parser_get_uint (bparser, "sh_flags"); + section->allocated = !!(flags & SHF_ALLOC); + + if (section->allocated) + section->load_address = bin_parser_get_uint (bparser, "sh_addr"); + else + section->load_address = 0; + return section; } @@ -158,6 +164,70 @@ elf_parser_new (const guchar *data, gsize length) return parser; } +guint32 +elf_parser_get_crc32 (ElfParser *parser) +{ + static const unsigned long crc32_table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d + }; + const guchar *data; + gsize length; + gulong crc; + gsize i; + + data = bin_parser_get_data (parser->parser); + length = bin_parser_get_length (parser->parser); + + crc = 0; + + for (i = 0; i < length; ++i) + crc = crc32_table[(crc ^ data[i]) & 0xff] ^ (crc >> 8); + + return crc & 0xFFFFFFFF; +} + void elf_parser_free (ElfParser *parser) { @@ -292,8 +362,37 @@ read_symbols (ElfParser *parser) } } +/* + * Returns the address that the start of the file would be loaded + * at if the whole file was mapped + */ +static gulong +elf_parser_get_load_address (ElfParser *parser) +{ + int i; + gulong load_address = (gulong)-1; + + for (i = 0; i < parser->n_sections; ++i) + { + Section *section = parser->sections[i]; + + if (section->allocated) + { + gulong addr = section->load_address - section->offset; + load_address = MIN (load_address, addr); + } + } + + g_print ("load address is: %8p\n", (void *)load_address); + + return load_address; +} + static ElfSym * -do_lookup (ElfSym *symbols, gulong address, int first, int last) +do_lookup (ElfSym *symbols, + gulong address, + int first, + int last) { if (address >= symbols[last].address) { @@ -316,13 +415,9 @@ do_lookup (ElfSym *symbols, gulong address, int first, int last) int mid = (first + last) / 2; if (symbols[mid].address > address) - { return do_lookup (symbols, address, first, mid); - } else - { return do_lookup (symbols, address, mid, last); - } } } @@ -336,8 +431,15 @@ elf_parser_lookup_symbol (ElfParser *parser, if (parser->n_symbols == 0) return NULL; - /* FIXME: we should offset address based on the files load address */ + address += elf_parser_get_load_address (parser); + +#if 0 + g_print ("the address we are looking up is %p\n", address); +#endif + /* FIXME: we should look at the symbol size and check if the + * address is actually within the function. + */ return do_lookup (parser->symbols, address, 0, parser->n_symbols - 1); } @@ -363,6 +465,68 @@ parser_from_sym (const ElfSym *sym) return NULL; } +const char * +elf_parser_get_debug_link (ElfParser *parser, guint32 *crc32) +{ + const Section *debug_link = find_section (parser, ".gnu_debuglink"); + const gchar *result; + gsize crc_offset; + + if (!debug_link) + return NULL; + + bin_parser_begin (parser->parser, NULL, debug_link->offset); + result = bin_parser_get_string (parser->parser); + bin_parser_end (parser->parser); + + crc_offset = strlen (result) + 1; + crc_offset = (crc_offset + 3) & ~3; + + /* FIXME: This is broken for two reasons: + * + * (1) It assumes file_endian==machine_endian + * + * (2) It doesn't check for file overrun. + * + * The fix is to make binparser capable of dealing with stuff + * outside of records. + */ + + *crc32 = *(guint32 *)(result + crc_offset); + return result; +} + +#if 0 +get_debug_link_info (bfd *abfd, unsigned long *crc32_out) +{ + asection *sect; + bfd_size_type debuglink_size; + unsigned long crc32; + char *contents; + int crc_offset; + + sect = bfd_get_section_by_name (abfd, ".gnu_debuglink"); + + if (sect == NULL) + return NULL; + + debuglink_size = bfd_section_size (abfd, sect); + + contents = g_malloc (debuglink_size); + bfd_get_section_contents (abfd, sect, contents, + (file_ptr)0, (bfd_size_type)debuglink_size); + + /* Crc value is stored after the filename, aligned up to 4 bytes. */ + crc_offset = strlen (contents) + 1; + crc_offset = (crc_offset + 3) & ~3; + + crc32 = bfd_get_32 (abfd, (bfd_byte *) (contents + crc_offset)); + + *crc32_out = crc32; + return contents; +} +#endif + const char * elf_sym_get_name (const ElfSym *sym) { diff --git a/elfparser.h b/elfparser.h index 15da42f6..daaca11d 100644 --- a/elfparser.h +++ b/elfparser.h @@ -21,7 +21,7 @@ void elf_parser_free (ElfParser *parser); */ const ElfSym *elf_parser_lookup_symbol (ElfParser *parser, gulong address); - +guint32 elf_parser_get_crc32 (ElfParser *parser); const char *elf_sym_get_name (const ElfSym *sym); gulong elf_sym_get_address (const ElfSym *sym); char *elf_demangle (const char *name); diff --git a/testelf.c b/testelf.c index d4b0d46f..7c27db41 100644 --- a/testelf.c +++ b/testelf.c @@ -7,25 +7,26 @@ static void check (ElfParser *elf, gulong addr) { const ElfSym *sym = elf_parser_lookup_symbol (elf, addr); + + if (!sym) + { + g_print ("not found\n"); + return; + } + n = elf_sym_get_name (sym); -#if 0 g_print ("%p => ", (void *)addr); -#endif if (sym) { -#if 0 g_print ("found: %s (%p)\n", elf_sym_get_name (sym), (void *)elf_sym_get_address (sym)); -#endif } else { -#if 0 g_print ("not found\n"); -#endif } } @@ -42,8 +43,9 @@ main () for (i = 0; i < 5000000; ++i) { - check (elf, 0x3e7ef20); /* gtk_handle_box_end_drag */ - check (elf, 0x3e7ef25); /* same (but in the middle of the function */ + check (elf, 0x077c80f0 - (0x07787000 - 0)); /* gtk_about_dialog_set_artists (add - (map - offset)) */ + + check (elf, 0x077c80f0 - (0x07787000 - 0)); /* same (but in the middle of the function */ } return 0; }