Many cleanups.

2006-08-19  Soren Sandmann  <sandmann@redhat.com>

	* elfparser.c: Many cleanups.
This commit is contained in:
Soren Sandmann
2006-08-19 23:56:18 +00:00
committed by Søren Sandmann Pedersen
parent d3c6a66101
commit 8967b3148c
2 changed files with 346 additions and 435 deletions

View File

@ -1,6 +1,6 @@
2006-08-16 Søren Sandmann <ssp@localhost.localdomain> 2006-08-19 Soren Sandmann <sandmann@redhat.com>
* elfparser.c (check_symbol): Pass the correct pointer. * elfparser.c: Many cleanups.
2006-08-16 Soren Sandmann <sandmann@redhat.com> 2006-08-16 Soren Sandmann <sandmann@redhat.com>

View File

@ -4,6 +4,7 @@
#include "elfparser.h" #include "elfparser.h"
typedef struct SymbolTable SymbolTable; typedef struct SymbolTable SymbolTable;
typedef struct Section Section;
struct SymbolTable struct SymbolTable
{ {
@ -13,184 +14,86 @@ struct ElfSym
{ {
}; };
struct ElfParser struct Section
{ {
BinParser *parser; const gchar * name;
BinFormat *header; gsize offset;
BinFormat *strtab_format; gsize size;
BinFormat *shn_entry;
BinFormat *sym_format;
gsize strtab_offset;
gsize str_table;
}; };
struct ElfParser
{
BinParser * parser;
BinFormat * header;
BinFormat * strtab_format;
BinFormat * shn_entry;
BinFormat * sym_format;
int n_sections;
Section ** sections;
};
static gboolean parse_elf_signature (const guchar *data, gsize length,
gboolean *is_64, gboolean *is_be);
static void make_formats (ElfParser *parser, static void make_formats (ElfParser *parser,
gboolean is_64, gboolean is_64,
gboolean is_big_endian); gboolean is_big_endian);
#if 0
BinFormat *str_tab = bin_field_new_string ("string", make_string());
gsize offset = find_it();
#endif
#if 0
static void
parse_elf (const guchar *data,
gsize length)
{
gboolean is_64, is_big_endian;
BinFormat *elf_header;
BinFormat *shn_entry;
BinParser *parser;
BinParser *sh_parser;
BinFormat *sym;
int i;
find_elf_type (data, length, &is_64, &is_big_endian);
parser = bin_parser_new (data, length);
bin_parser_begin (parser, elf_header, 0);
g_print ("section header offset: %u\n",
bin_parser_get_uint ("e_shoff"));
g_print ("There are %llu sections\n",
bin_parser_get_uint (parser, "e_shnum"));
}
#endif
static const char * static const char *
elf_lookup_string (ElfParser *parser, int offset) get_string (BinParser *parser,
gsize table,
const char *name)
{ {
const char *result; const char *result = NULL;
gsize index;
/* This function has a midleading name. In reality index = bin_parser_get_uint (parser, name);
* it only looks up in the section header table
*/
bin_parser_begin (parser->parser, bin_parser_begin (parser, NULL, table + index);
NULL, parser->strtab_offset + offset);
result = bin_parser_get_string (parser->parser); result = bin_parser_get_string (parser);
bin_parser_end (parser->parser); bin_parser_end (parser);
return result; return result;
} }
static gboolean static Section *
find_elf_type (const guchar *data, gsize length, section_new (ElfParser *parser,
gboolean *is_64, gboolean *is_be) gsize name_table)
{ {
/* FIXME: this function should be able to return an error */ BinParser *bparser = parser->parser;
if (length < EI_NIDENT) Section *section = g_new (Section, 1);
return FALSE;
/* 32 or 64? */ section->name = get_string (bparser, name_table, "sh_name");
if (data[EI_CLASS] == ELFCLASS32) section->size = bin_parser_get_uint (bparser, "sh_size");
{ section->offset = bin_parser_get_uint (bparser, "sh_offset");
*is_64 = FALSE;
}
else if (data[EI_CLASS] == ELFCLASS64)
{
*is_64 = TRUE;
}
else
{
/* FIXME: set_error */
return FALSE;
}
/* big or little endian? */ return section;
if (data[EI_DATA] == ELFDATA2LSB)
{
*is_be = FALSE;
}
else if (data[EI_DATA] == ELFDATA2MSB)
{
*is_be = TRUE;
}
else
{
/* FIXME: set error */
return FALSE;
}
#if 0
g_print ("This elf file is %s %s\n",
*is_64? "64 bit" : "32 bit",
*is_be? "big endiann" : "little endian");
#endif
return TRUE;
}
static BinField *
make_word (gboolean is_64)
{
if (is_64)
return bin_field_new_uint64 ();
else
return bin_field_new_uint32 ();
} }
static void static void
dump_symbol_table (ElfParser *parser, section_free (Section *section)
gsize offset, {
gsize size) g_free (section);
}
static const Section *
find_section (ElfParser *parser,
const char *name)
{ {
int i; int i;
if (!parser->str_table)
for (i = 0; i < parser->n_sections; ++i)
{ {
g_print ("no string table\n"); Section *section = parser->sections[i];
return;
if (strcmp (section->name, name) == 0)
return section;
} }
#if 0 return NULL;
g_print ("dumping symbol table at %d\n", offset);
#endif
bin_parser_begin (parser->parser, parser->sym_format, offset);
for (i = 0; i < 2000; ++i)
{
guint64 idx;
bin_parser_index (parser->parser, i);
idx = bin_parser_get_uint (parser->parser, "st_name");
const char *result;
gsize size;
gulong addr;
guint info;
#if 0
g_print ("addr: %p\n", bin_parser_get_address (parser->parser, "st_name"));
#endif
#if 0
g_print ("idx: %d\n", idx);
#endif
size = bin_parser_get_uint (parser->parser, "st_size");
info = bin_parser_get_uint (parser->parser, "st_info");
if (info == STT_FUNC)
{
addr = bin_parser_get_uint (parser->parser, "st_value");
bin_parser_begin (parser->parser,
NULL, parser->str_table + idx);
result = bin_parser_get_string (parser->parser);
bin_parser_end (parser->parser);
g_print ("%d %p: symbol: size: %d, %s\n",
i, (void *)addr, size, result);
}
}
bin_parser_end (parser->parser);
} }
ElfParser * ElfParser *
@ -198,11 +101,12 @@ elf_parser_new (const guchar *data, gsize length)
{ {
ElfParser *parser; ElfParser *parser;
gboolean is_64, is_big_endian; gboolean is_64, is_big_endian;
int n_sections; int section_names_idx;
int section_name_table; gsize section_names;
gsize section_headers;
int i; int i;
if (!find_elf_type (data, length, &is_64, &is_big_endian)) if (!parse_elf_signature (data, length, &is_64, &is_big_endian))
{ {
/* FIXME: set error */ /* FIXME: set error */
return NULL; return NULL;
@ -214,189 +118,56 @@ elf_parser_new (const guchar *data, gsize length)
make_formats (parser, is_64, is_big_endian); make_formats (parser, is_64, is_big_endian);
/* Read ELF header */
bin_parser_begin (parser->parser, parser->header, 0); bin_parser_begin (parser->parser, parser->header, 0);
n_sections = parser->n_sections = bin_parser_get_uint (parser->parser, "e_shnum");
bin_parser_get_uint (parser->parser, "e_shnum"); section_names_idx = bin_parser_get_uint (parser->parser, "e_shstrndx");
section_headers = bin_parser_get_uint (parser->parser, "e_shoff");
section_name_table =
bin_parser_get_uint (parser->parser, "e_shstrndx");
bin_parser_begin (
parser->parser, parser->shn_entry,
bin_parser_get_uint (parser->parser, "e_shoff"));
bin_parser_index (parser->parser, section_name_table);
parser->strtab_offset =
bin_parser_get_uint (parser->parser, "sh_offset");
for (i = 0; i < n_sections; ++i)
{
const char *name;
int offset;
bin_parser_index (parser->parser, i);
offset = bin_parser_get_uint (parser->parser, "sh_name");
name = elf_lookup_string (parser, offset);
if (strcmp (name, ".strtab") == 0)
{
parser->str_table = bin_parser_get_uint (
parser->parser, "sh_offset");
}
}
for (i = 0; i < n_sections; ++i)
{
const char *name;
int offset;
const char *type;
bin_parser_index (parser->parser, i);
offset = bin_parser_get_uint (parser->parser, "sh_name");
name = elf_lookup_string (parser, offset);
switch (bin_parser_get_uint (parser->parser, "sh_type"))
{
case SHT_NULL:
type = "undefined";
break;
case SHT_PROGBITS:
type = "progbits";
break;
case SHT_SYMTAB:
type = "symbol table";
dump_symbol_table (
parser,
bin_parser_get_uint (parser->parser, "sh_offset"),
bin_parser_get_uint (parser->parser, "sh_size"));
break;
case SHT_STRTAB:
type = "string table";
break;
case SHT_RELA:
type = "relocations with explicit addends";
break;
case SHT_HASH:
type = "symbol hash table";
break;
case SHT_DYNAMIC:
type = "Information for dynamic linking";
break;
case SHT_NOTE:
type = "note";
break;
case SHT_NOBITS:
type = "nobits";
break;
case SHT_REL:
type = "relocations without explicit addends";
break;
case SHT_SHLIB:
type = "reserved with unspecified semantics";
break;
case SHT_DYNSYM:
type = "dynamic symbols";
break;
case SHT_LOPROC:
type = "loproc";
break;
case SHT_HIPROC:
type = "hiproc";
break;
case SHT_LOUSER:
type = "louser:";
break;
case SHT_HIUSER:
type = "hiuser";
break;
default:
type = "<unknown>";
break;
}
g_print ("%s [%s] (%d)\n", name, type, offset);
}
bin_parser_end (parser->parser); bin_parser_end (parser->parser);
/* Read section headers */
parser->sections = g_new0 (Section *, parser->n_sections);
bin_parser_begin (parser->parser, parser->shn_entry, section_headers);
bin_parser_index (parser->parser, section_names_idx);
section_names = bin_parser_get_uint (parser->parser, "sh_offset");
for (i = 0; i < parser->n_sections; ++i)
{
bin_parser_index (parser->parser, i);
parser->sections[i] = section_new (parser, section_names);
}
bin_parser_end (parser->parser); bin_parser_end (parser->parser);
return parser; return parser;
} }
static const char * void
get_string (BinParser *parser, elf_parser_free (ElfParser *parser)
gsize table,
gsize offset)
{ {
const char *result = NULL;
bin_parser_begin (parser, NULL, table + offset);
result = bin_parser_get_string (parser);
bin_parser_end (parser);
return result;
}
static gssize
find_section (ElfParser *parser,
const char *name)
{
int n_sections;
int section_name_table;
int section_headers_offset;
int section_name_table_offset;
BinParser *bparser = parser->parser;
int i; int i;
gssize result;
bin_parser_begin (parser->parser, parser->header, 0); for (i = 0; i < parser->n_sections; ++i)
section_free (parser->sections[i]);
g_free (parser->sections);
n_sections = bin_parser_get_uint (bparser, "e_shnum"); g_free (parser);
section_name_table = bin_parser_get_uint (bparser, "e_shstrndx");
section_headers_offset = bin_parser_get_uint (bparser, "e_shoff");
bin_parser_begin (bparser, parser->shn_entry, section_headers_offset);
bin_parser_index (bparser, section_name_table);
section_name_table_offset = bin_parser_get_uint (bparser, "sh_offset");
result = -1;
for (i = 0; i < n_sections; ++i)
{
const char *section_name;
gsize name_offset;
bin_parser_index (bparser, i);
name_offset = bin_parser_get_uint (bparser, "sh_name");
section_name = get_string (
bparser, section_name_table_offset, name_offset);
if (strcmp (section_name, name) == 0)
{
result = bin_parser_get_uint (bparser, "sh_offset");
goto out;
}
}
out:
bin_parser_end (bparser);
#if 0
g_print ("found %s at %d\n", name, result);
#endif
return result;
} }
/*
* Looking up symbols
*/
#if 0
static int lookup_function_symbol (ElfParser *parser,
int begin,
int end,
gulong address);
static gboolean static gboolean
check_symbol (ElfParser *parser, check_symbol (ElfParser *parser,
int index, int index,
@ -409,7 +180,57 @@ check_symbol (ElfParser *parser,
return FALSE; return FALSE;
} }
void static gboolean
is_function (ElfParser *parser, int index)
{
return FALSE;
}
static gulong
get_address (ElfParser *parser, int index)
{
return 0;
}
static int
do_check (ElfParser *parser,
int begin,
int current,
int other,
int end,
gulong address)
{
int first = current > other ? current : other;
int last = current > other ? other : current;
/* The invariant here is that nothing between first
* and last is a function
*/
if (is_function (parser, current))
{
gulong addr = get_address (parser, current);
if (addr == address)
{
return current;
}
else if (addr > address)
{
return lookup_function_symbol (
parser, begin, first, address);
}
else
{
return lookup_function_symbol (
parser, last, end, address);
}
}
return -1;
}
static int
lookup_function_symbol (ElfParser *parser, lookup_function_symbol (ElfParser *parser,
int begin, int begin,
int end, int end,
@ -424,68 +245,158 @@ lookup_function_symbol (ElfParser *parser,
for (i = 0; i < end - begin; ++i) for (i = 0; i < end - begin; ++i)
{ {
bin_parser_index (parser->parser, i); bin_parser_index (parser->parser, i);
if (check_symbol (parser, i, address))
return; if (is_function (parser, i) &&
get_address (parser, i == address))
{
return i;
}
} }
} }
else else
{ {
int mid1 = (end - begin) / 2; int mid1, mid2;
int mid2 = ((end - begin) / 2 - 1);
mid1 = mid2 = (end - begin) / 2;
while (mid1 >= begin && while (mid1 >= begin &&
mid2 < end) mid2 < end)
{ {
/* int res;
if mid1 is a function, res = do_check (parser, begin, mid1, mid2, end, address);
then check the address. if (res > 0)
if higher than input address, return res;
recurse on (begin, mid1).
else
recurse on (mid2 - 1, end)
res = do_check (parser, begin, mid2, mid1, end, address);
similar for mid2, only the other way around. if (res > 0)
return res;
of course, if one of them matches
*/
if (check_symbol (parser, mid1, address))
return;
if (check_symbol (parser, mid2, address))
return;
mid1--; mid1--;
mid2++; mid2++;
} }
} }
return -1;
}
#endif
#if 0
#define ELF32_ST_BIND(val) (((unsigned char) (val)) >> 4)
#define ELF32_ST_TYPE(val) ((val) & 0xf)
#define ELF32_ST_INFO(bind, type) (((bind) << 4) + ((type) & 0xf))
/* Both Elf32_Sym and Elf64_Sym use the same one-byte st_info field. */
#define ELF64_ST_BIND(val) ELF32_ST_BIND (val)
#define ELF64_ST_TYPE(val) ELF32_ST_TYPE (val)
#define ELF64_ST_INFO(bind, type) ELF32_ST_INFO ((bind), (type))
#endif
static ElfSym *
lookup_symbol (ElfParser *parser,
const Section *sym_table,
const Section *str_table,
gulong address)
{
int n_symbols = sym_table->size / bin_format_get_size (parser->sym_format);
int i;
g_print ("\ndumping %d symbols from %s\n", n_symbols, sym_table->name);
bin_parser_begin (parser->parser, parser->sym_format, sym_table->offset);
for (i = 0; i < n_symbols; ++i)
{
const char *name;
gulong addr;
guint info;
bin_parser_index (parser->parser, i);
name = get_string (parser->parser, str_table->offset, "st_name");
info = bin_parser_get_uint (parser->parser, "st_info");
addr = bin_parser_get_uint (parser->parser, "st_value");
if ((info & 0xf) == STT_FUNC)
g_print ("symbol: %8lx, %s\n", addr, name);
}
bin_parser_end (parser->parser);
return NULL;
} }
const ElfSym * const ElfSym *
elf_parser_lookup_symbol (ElfParser *parser, elf_parser_lookup_symbol (ElfParser *parser,
gulong address) gulong address)
{ {
gssize symtab_offset = find_section (parser, ".symtab"); const Section *symtab = find_section (parser, ".symtab");
gssize strtab_offset = find_section (parser, ".strtab"); const Section *dynsym = find_section (parser, ".dynsym");
gssize dynsym_offset = find_section (parser, ".dynsym"); const Section *strtab = find_section (parser, ".strtab");
gssize dynstr_offset = find_section (parser, ".dynstr"); const Section *dynstr = find_section (parser, ".dynstr");
if (symtab_offset != -1 && strtab_offset != -1) if (strtab && symtab)
{ {
/* lookup in normal symbol table */ lookup_symbol (parser, symtab, strtab, address);
} }
if (dynsym_offset != -1 && dynstr_offset != -1) if (dynsym && dynstr)
{ {
/* lookup in dynsym table */ lookup_symbol (parser, dynsym, dynstr, address);
} }
g_print ("HELLO!!\n");
return NULL; return NULL;
} }
/*
* Utility functions
*/
static gboolean
parse_elf_signature (const guchar *data,
gsize length,
gboolean *is_64,
gboolean *is_be)
{
/* FIXME: this function should be able to return an error */
if (length < EI_NIDENT)
{
/* FIXME set error */
return FALSE;
}
if (data[EI_CLASS] != ELFCLASS32 &&
data[EI_CLASS] != ELFCLASS64)
{
/* FIXME set error */
return FALSE;
}
if (data[EI_DATA] != ELFDATA2LSB &&
data[EI_DATA] != ELFDATA2MSB)
{
/* FIXME set error */
return FALSE;
}
if (is_64)
*is_64 = (EI_CLASS == ELFCLASS64);
if (is_be)
*is_be = (EI_DATA == ELFDATA2MSB);
return TRUE;
}
static BinField *
make_word (gboolean is_64)
{
if (is_64)
return bin_field_new_uint64 ();
else
return bin_field_new_uint32 ();
}
static void static void
make_formats (ElfParser *parser, gboolean is_64, gboolean is_big_endian) make_formats (ElfParser *parser, gboolean is_64, gboolean is_big_endian)
{ {