Add beginning of an ELF parser.

2006-08-15  Soren Sandmann  <sandmann@redhat.com>

	Add beginning of an ELF parser.

	* binparser.[ch]: New files
	* elfparser.[ch]: New files
	* testelf.c: New file
	* Makefile.am (testelf_SOURCES): Add new testelf program.
This commit is contained in:
Soren Sandmann
2006-08-16 03:31:10 +00:00
committed by Søren Sandmann Pedersen
parent bde3d8a537
commit 2bc2e3658d
6 changed files with 1037 additions and 0 deletions

View File

@ -1,3 +1,12 @@
2006-08-15 Soren Sandmann <sandmann@redhat.com>
Add beginning of an ELF parser.
* binparser.[ch]: New files
* elfparser.[ch]: New files
* testelf.c: New file
* Makefile.am (testelf_SOURCES): Add new testelf program.
Sat Aug 12 16:13:05 2006 Søren Sandmann <sandmann@redhat.com>
* module/sysprof-module.c: Make n_samples per-cpu. Add an atomic

View File

@ -3,6 +3,12 @@ DIST_SUBDIRS = module
bin_PROGRAMS = sysprof-text
noinst_PROGRAMS = testelf
testelf_SOURCES = testelf.c elfparser.c elfparser.h binparser.c binparser.h
testelf_CPPFLAGS = \
$(CORE_DEP_CFLAGS)
testelf_LDADD = $(CORE_DEP_LIBS)
if BUILD_GUI
bin_PROGRAMS += sysprof
endif

531
binparser.c Normal file
View File

@ -0,0 +1,531 @@
#include <string.h>
#include <stdlib.h>
#include <glib.h>
#include <stdarg.h>
#include "binparser.h"
typedef struct ParserFrame ParserFrame;
struct ParserFrame
{
BinFormat * format;
int index;
gsize offset;
ParserFrame * next;
};
struct BinField
{
guint64 offset;
int width;
int align;
char * name;
};
struct BinFormat
{
gboolean big_endian;
int n_fields;
BinField * fields;
};
struct BinParser
{
ParserFrame * frame;
const guchar * data;
gsize length;
};
BinParser *
bin_parser_new (const guchar *data,
gsize length)
{
BinParser *parser = g_new0 (BinParser, 1);
parser->frame = NULL;
parser->data = data;
parser->length = length;
return parser;
}
static GQueue *
read_varargs (va_list args,
const char * name,
BinField * field)
{
GQueue *queue = g_queue_new ();
gpointer p;
if (name)
{
g_queue_push_tail (queue, (gpointer)name);
g_queue_push_tail (queue, field);
p = va_arg (args, gpointer);
while (p)
{
g_queue_push_tail (queue, p);
p = va_arg (args, gpointer);
}
}
return queue;
}
static guint64
align (guint64 offset, int alignment)
{
if (offset % alignment != 0)
offset += (alignment - (offset % alignment));
return offset;
}
gsize
bin_format_get_size (BinFormat *format)
{
BinField *last_field = &(format->fields[format->n_fields - 1]);
BinField *first_field = &(format->fields[0]);
return align (last_field->offset + last_field->width, first_field->width);
}
BinFormat *
bin_format_new (gboolean big_endian,
const char *name, BinField *field,
...)
{
GQueue *queue = g_queue_new ();
BinFormat *format = g_new0 (BinFormat, 1);
GList *list;
int i;
guint64 offset;
va_list args;
format->big_endian = big_endian;
/* Build queue of child types */
va_start (args, field);
queue = read_varargs (args, name, field);
va_end (args);
g_assert (queue->length % 2 == 0);
format->n_fields = queue->length / 2;
format->fields = g_new (BinField, format->n_fields);
i = 0;
offset = 0;
for (list = queue->head; list != NULL; list = list->next->next)
{
const char *name = list->data;
BinField *field = list->next->data;
offset = align (offset, field->align);
format->fields[i].name = g_strdup (name);
format->fields[i].width = field->width;
format->fields[i].offset = offset;
offset += field->width;
++i;
g_free (field);
}
g_queue_free (queue);
return format;
}
static const BinField *
get_field (BinFormat *format,
const gchar *name)
{
int i;
for (i = 0; i < format->n_fields; ++i)
{
BinField *field = &(format->fields[i]);
if (strcmp (field->name, name) == 0)
return field;
}
return NULL;
}
guint64
bin_parser_get_uint (BinParser *parser,
const gchar *name)
{
const BinField *field;
const guint8 *pos;
guint8 r8;
guint16 r16;
guint32 r32;
guint64 r64;
BinFormat *format;
const guchar *data;
g_return_val_if_fail (parser->frame != NULL, 0);
format = parser->frame->format;
data = parser->data + parser->frame->offset;
field = get_field (format, name);
g_return_val_if_fail (field != NULL, (guint64)-1);
pos = data + field->offset;
if (field->offset + field->width > parser->length)
{
/* FIXME: generate error */
return 0;
}
#if 0
g_print ("getting %s from offset: %llu\n", name, field->offset);
#endif
switch (field->width)
{
case 1:
r8 = *(guint8 *)pos;
return r8;
case 2:
r16 = *(guint16 *)pos;
if (format->big_endian)
r16 = GUINT16_FROM_BE (r16);
else
r16 = GUINT16_FROM_LE (r16);
return r16;
case 4:
r32 = *(guint32 *)pos;
if (format->big_endian)
r32 = GUINT32_FROM_BE (r32);
else
r32 = GUINT32_FROM_LE (r32);
return r32;
case 8:
r64 = *(guint64 *)pos;
if (format->big_endian)
r64 = GUINT64_FROM_BE (r64);
else
r64 = GUINT64_FROM_LE (r64);
return r64;
}
g_print ("width: %d\n", field->width);
g_assert_not_reached();
return 0;
}
static BinField *
new_field_uint (int width)
{
BinField *field = g_new0 (BinField, 1);
field->width = width;
field->align = width;
return field;
}
BinField *
bin_field_new_uint8 (void)
{
return new_field_uint (1);
}
BinField *
bin_field_new_uint16 (void)
{
return new_field_uint (2);
}
BinField *
bin_field_new_uint32 (void)
{
return new_field_uint (4);
}
BinField *
bin_field_new_uint64 (void)
{
return new_field_uint (8);
}
const gchar *
bin_parser_get_string (BinParser *parser)
{
/* FIXME: check that the string is within the file */
return (gchar *)parser->data + parser->frame->offset;
}
void
bin_parser_begin (BinParser *parser,
BinFormat *format,
gsize offset)
{
ParserFrame *frame = g_new0 (ParserFrame, 1);
frame->format = format;
frame->offset = offset;
frame->index = 0;
frame->next = parser->frame;
parser->frame = frame;
}
void
bin_parser_end (BinParser *parser)
{
ParserFrame *frame;
frame = parser->frame;
parser->frame = frame->next;
g_free (frame);
}
void
bin_parser_index (BinParser *parser,
int index)
{
gsize format_size = bin_format_get_size (parser->frame->format);
parser->frame->offset -= parser->frame->index * format_size;
parser->frame->offset += index * format_size;
parser->frame->index = index;
}
BinField *
bin_field_new_fixed_array (int n_elements,
int element_size)
{
BinField *field = g_new0 (BinField, 1);
field->width = n_elements * element_size;
field->align = element_size;
return field;
}
#if 0
#include <elf.h>
static gboolean
find_elf_type (const guchar *data, gsize length,
gboolean *is_64, gboolean *is_be)
{
/* FIXME: this function should be able to return an error */
if (length < EI_NIDENT)
return FALSE;
/* 32 or 64? */
switch (data[EI_CLASS])
{
case ELFCLASS32:
*is_64 = FALSE;
break;
case ELFCLASS64:
*is_64 = TRUE;
break;
default:
/* return ERROR */
return FALSE;
break;
}
/* big or little endian? */
switch (data[EI_DATA])
{
case ELFDATA2LSB:
*is_be = FALSE;
break;
case ELFDATA2MSB:
*is_be = TRUE;
break;
default:
/* return Error */
return FALSE;
break;
}
g_print ("This elf file is %s %s\n",
*is_64? "64 bit" : "32 bit",
*is_be? "big endiann" : "little endian");
return TRUE;
}
void
parse_elf (const guchar *data,
gsize length)
{
gboolean is_64, is_big_endian;
BinFormat *elf_header;
BinFormat *shn_entry;
const guchar *section_header;
BinParser *parser;
BinParser *sh_parser;
BinFormat *sym;
int i;
find_elf_type (data, length, &is_64, &is_big_endian);
elf_header = bin_format_new (
is_big_endian,
"e_ident", bin_field_new_fixed_array (EI_NIDENT, 1),
"e_type", bin_field_new_uint16 (),
"e_machine", bin_field_new_uint16 (),
"e_version", bin_field_new_uint32 (),
"e_entry", make_word (is_64),
"e_phoff", make_word (is_64),
"e_shoff", make_word (is_64),
"e_flags", make_uint32 (),
"e_ehsize", make_uint16(),
"e_phentsize", make_uint16 (),
"e_phnum", make_uint16 (),
"e_shentsize", make_uint16 (),
"e_shnum", make_uint16 (),
"e_shstrndx", make_uint16 (),
NULL);
shn_entry = bin_format_new (
is_big_endian,
"sh_name", make_uint32(),
"sh_type", make_uint32(),
"sh_flags", make_word (is_64),
"sh_addr", make_word (is_64),
"sh_offset", make_word (is_64),
"sh_size", make_word (is_64),
"sh_link", make_uint32(),
"sh_info", make_uint32(),
"sh_addralign", make_word (is_64),
"sh_entsize", make_word (is_64),
NULL);
if (is_64)
{
sym = bin_format_new (
is_big_endian,
"st_name", make_uint32(),
"st_info", make_uint8 (),
"st_other", make_uint8 (),
"st_shndx", make_uint16 (),
"st_value", make_uint64 (),
"st_size", make_uint64 (),
NULL);
}
else
{
sym = bin_format_new (
is_big_endian,
"st_name", make_uint32 (),
"st_value", make_uint32 (),
"st_size", make_uint32 (),
"st_info", make_uint8 (),
"st_other", make_uint8 (),
"st_shndx", make_uint16 ());
}
parser = bin_parser_new (elf_header, data, length);
section_header = data + bin_parser_get_uint (parser, "e_shoff");
g_print ("section header offset: %u\n",
section_header - data);
g_print ("There are %llu sections\n",
bin_parser_get_uint (parser, "e_shnum"));
/* should think through how to deal with offsets, and whether parsers
* are always considered parsers of an array. If yes, then it
* may be reasonable to just pass the length of the array.
*
* Hmm, although the parser still needs to know the end of the data.
* Maybe create yet another structure, a subparser, that also contains
* an offset in addition to the beginning and length.
*
* Ie., bin_sub_parser_new (parser, section_header, shn_entry, n_headers);
*
* In that case, it might be interesting to merge format and parser,
* and just call it 'file' or something, then call the subparser "parser"
*
* Also, how do we deal with strings?
*
* "asdf", make_string()?
*
*/
sh_parser = bin_parser_new (shn_entry, section_header, (guint)-1);
for (i = 0; i < bin_parser_get_uint (parser, "e_shnum"); ++i)
{
#if 0
bin_parser_set_index (sh_parser, i);
#endif
#if 0
bin_parser_get_uint
parser, data + i * bin_format_length (shn_entry));
section_header =
data + bin_parser_get_uint (parser, "e_shoff");
parser = bin_parser_new (
#endif
}
#if 0
bin_format_array_get_string (shn_table, data, "sh_name");
bin_format_array_get_uint (shn_table, data, "sh_addr");
#endif
}
static void
disaster (const char *str)
{
g_printerr ("%s\n", str);
exit (-1);
}
int
main ()
{
GMappedFile *libgtk;
libgtk = g_mapped_file_new ("/usr/lib/libgtk-x11-2.0.so", FALSE, NULL);
if (!libgtk)
disaster ("Could not map the file\n");
parse_elf ((const guchar *)g_mapped_file_get_contents (libgtk),
g_mapped_file_get_length (libgtk));
return 0 ;
}
#endif

25
binparser.h Normal file
View File

@ -0,0 +1,25 @@
#include <glib.h>
typedef struct BinField BinField;
typedef struct BinFormat BinFormat;
typedef struct BinParser BinParser;
BinParser *bin_parser_new (const guchar *data,
gsize length);
BinFormat *bin_format_new (gboolean big_endian,
const char *name, BinField *field,
...);
gsize bin_format_get_size (BinFormat *format);
void bin_parser_index (BinParser *parser, int index);
void bin_parser_begin (BinParser *parser,
BinFormat *format,
gsize offset);
void bin_parser_end (BinParser *parser);
const char *bin_parser_get_string (BinParser *parser);
guint64 bin_parser_get_uint (BinParser *parser,
const gchar *name);
BinField *bin_field_new_uint8 (void);
BinField *bin_field_new_uint16 (void);
BinField *bin_field_new_uint32 (void);
BinField *bin_field_new_uint64 (void);
BinField *bin_field_new_fixed_array (int n_elements,
int element_size);

458
elfparser.c Normal file
View File

@ -0,0 +1,458 @@
#include <string.h>
#include <elf.h>
#include "binparser.h"
#include "elfparser.h"
typedef struct SymbolTable SymbolTable;
struct SymbolTable
{
};
struct ElfSym
{
};
struct ElfParser
{
BinParser *parser;
BinFormat *header;
BinFormat *strtab_format;
BinFormat *shn_entry;
BinFormat *sym_format;
gsize strtab_offset;
gsize str_table;
};
static void make_formats (ElfParser *parser,
gboolean is_64,
gboolean is_big_endian);
#if 0
BinFormat *str_tab = bin_field_new_string ("string", make_string());
gsize offset = find_it();
#endif
#if 0
static void
parse_elf (const guchar *data,
gsize length)
{
gboolean is_64, is_big_endian;
BinFormat *elf_header;
BinFormat *shn_entry;
BinParser *parser;
BinParser *sh_parser;
BinFormat *sym;
int i;
find_elf_type (data, length, &is_64, &is_big_endian);
parser = bin_parser_new (data, length);
bin_parser_begin (parser, elf_header, 0);
g_print ("section header offset: %u\n",
bin_parser_get_uint ("e_shoff"));
g_print ("There are %llu sections\n",
bin_parser_get_uint (parser, "e_shnum"));
}
#endif
static const char *
elf_lookup_string (ElfParser *parser, int offset)
{
const char *result;
/* This function has a midleading name. In reality
* it only looks up in the section header table
*/
bin_parser_begin (parser->parser,
NULL, parser->strtab_offset + offset);
result = bin_parser_get_string (parser->parser);
bin_parser_end (parser->parser);
return result;
}
static gboolean
find_elf_type (const guchar *data, gsize length,
gboolean *is_64, gboolean *is_be)
{
/* FIXME: this function should be able to return an error */
if (length < EI_NIDENT)
return FALSE;
/* 32 or 64? */
if (data[EI_CLASS] == ELFCLASS32)
{
*is_64 = FALSE;
}
else if (data[EI_CLASS] == ELFCLASS64)
{
*is_64 = TRUE;
}
else
{
/* FIXME: set_error */
return FALSE;
}
/* big or little endian? */
if (data[EI_DATA] == ELFDATA2LSB)
{
*is_be = FALSE;
}
else if (data[EI_DATA] == ELFDATA2MSB)
{
*is_be = TRUE;
}
else
{
/* FIXME: set error */
return FALSE;
}
g_print ("This elf file is %s %s\n",
*is_64? "64 bit" : "32 bit",
*is_be? "big endiann" : "little endian");
return TRUE;
}
static BinField *
make_word (gboolean is_64)
{
if (is_64)
return bin_field_new_uint64 ();
else
return bin_field_new_uint32 ();
}
static void
dump_symbol_table (ElfParser *parser,
gsize offset,
gsize size)
{
int i;
if (!parser->str_table)
{
g_print ("no string table\n");
return;
}
g_print ("dumping symbol table at %d\n", offset);
bin_parser_begin (parser->parser, parser->sym_format, offset);
for (i = 0; i < 200; ++i)
{
guint64 idx;
bin_parser_index (parser->parser, i);
idx = bin_parser_get_uint (parser->parser, "st_name");
const char *result;
gsize size;
#if 0
g_print ("addr: %p\n", bin_parser_get_address (parser->parser, "st_name"));
#endif
#if 0
g_print ("idx: %d\n", idx);
#endif
size = bin_parser_get_uint (parser->parser, "st_size");
bin_parser_begin (parser->parser,
NULL, parser->str_table + idx);
result = bin_parser_get_string (parser->parser);
bin_parser_end (parser->parser);
g_print ("%d symbol: size: %d, %s.\n",
i, size, result);
}
bin_parser_end (parser->parser);
}
ElfParser *
elf_parser_new (const guchar *data, gsize length)
{
ElfParser *parser;
gboolean is_64, is_big_endian;
int n_sections;
int section_name_table;
int i;
if (!find_elf_type (data, length, &is_64, &is_big_endian))
{
/* FIXME: set error */
return NULL;
}
parser = g_new0 (ElfParser, 1);
parser->parser = bin_parser_new (data, length);
make_formats (parser, is_64, is_big_endian);
bin_parser_begin (parser->parser, parser->header, 0);
n_sections =
bin_parser_get_uint (parser->parser, "e_shnum");
section_name_table =
bin_parser_get_uint (parser->parser, "e_shstrndx");
bin_parser_begin (
parser->parser, parser->shn_entry,
bin_parser_get_uint (parser->parser, "e_shoff"));
bin_parser_index (parser->parser, section_name_table);
parser->strtab_offset =
bin_parser_get_uint (parser->parser, "sh_offset");
for (i = 0; i < n_sections; ++i)
{
const char *name;
int offset;
bin_parser_index (parser->parser, i);
offset = bin_parser_get_uint (parser->parser, "sh_name");
name = elf_lookup_string (parser, offset);
if (strcmp (name, ".strtab") == 0)
{
parser->str_table = bin_parser_get_uint (
parser->parser, "sh_offset");
}
}
for (i = 0; i < n_sections; ++i)
{
const char *name;
int offset;
const char *type;
bin_parser_index (parser->parser, i);
offset = bin_parser_get_uint (parser->parser, "sh_name");
name = elf_lookup_string (parser, offset);
switch (bin_parser_get_uint (parser->parser, "sh_type"))
{
case SHT_NULL:
type = "undefined";
break;
case SHT_PROGBITS:
type = "progbits";
break;
case SHT_SYMTAB:
type = "symbol table";
dump_symbol_table (
parser,
bin_parser_get_uint (parser->parser, "sh_offset"),
bin_parser_get_uint (parser->parser, "sh_size"));
break;
case SHT_STRTAB:
type = "string table";
break;
case SHT_RELA:
type = "relocations with explicit addends";
break;
case SHT_HASH:
type = "symbol hash table";
break;
case SHT_DYNAMIC:
type = "Information for dynamic linking";
break;
case SHT_NOTE:
type = "note";
break;
case SHT_NOBITS:
type = "nobits";
break;
case SHT_REL:
type = "relocations without explicit addends";
break;
case SHT_SHLIB:
type = "reserved with unspecified semantics";
break;
case SHT_DYNSYM:
type = "dynamic symbols";
break;
case SHT_LOPROC:
type = "loproc";
break;
case SHT_HIPROC:
type = "hiproc";
break;
case SHT_LOUSER:
type = "louser:";
break;
case SHT_HIUSER:
type = "hiuser";
break;
default:
type = "<unknown>";
break;
}
g_print ("%s [%s] (%d)\n", name, type, offset);
}
bin_parser_end (parser->parser);
bin_parser_end (parser->parser);
return parser;
}
static const char *
get_string (BinParser *parser,
gsize table,
gsize offset)
{
const char *result = NULL;
bin_parser_begin (parser, NULL, table + offset);
result = bin_parser_get_string (parser);
bin_parser_end (parser);
return result;
}
static gssize
find_section (ElfParser *parser,
const char *name)
{
int n_sections;
int section_name_table;
int section_headers_offset;
int section_name_table_offset;
BinParser *bparser = parser->parser;
int i;
gssize result;
bin_parser_begin (parser->parser, parser->header, 0);
n_sections = bin_parser_get_uint (bparser, "e_shnum");
section_name_table = bin_parser_get_uint (bparser, "e_shstrndx");
section_headers_offset = bin_parser_get_uint (bparser, "e_shoff");
bin_parser_begin (bparser, parser->shn_entry, section_headers_offset);
bin_parser_index (bparser, section_name_table);
section_name_table_offset = bin_parser_get_uint (bparser, "sh_offset");
result = -1;
for (i = 0; i < n_sections; ++i)
{
const char *section_name;
gsize name_offset;
bin_parser_index (bparser, i);
name_offset = bin_parser_get_uint (bparser, "sh_name");
section_name = get_string (
bparser, section_name_table_offset, name_offset);
if (strcmp (section_name, name) == 0)
{
result = bin_parser_get_uint (bparser, "sh_offset");
goto out;
}
}
out:
bin_parser_end (bparser);
g_print ("found %s at %d\n", name, result);
return result;
}
const ElfSym *
elf_parser_lookup_symbol (ElfParser *parser,
gulong address)
{
gssize symtab_offset = find_section (parser, ".symtab");
gssize strtab_offset = find_section (parser, ".strtab");
gssize dynsym_offset = find_section (parser, ".dynsym");
gssize dynstr_offset = find_section (parser, ".dynstr");
return NULL;
}
static void
make_formats (ElfParser *parser, gboolean is_64, gboolean is_big_endian)
{
parser->header = bin_format_new (
is_big_endian,
"e_ident", bin_field_new_fixed_array (EI_NIDENT, 1),
"e_type", bin_field_new_uint16 (),
"e_machine", bin_field_new_uint16 (),
"e_version", bin_field_new_uint32 (),
"e_entry", make_word (is_64),
"e_phoff", make_word (is_64),
"e_shoff", make_word (is_64),
"e_flags", bin_field_new_uint32 (),
"e_ehsize", bin_field_new_uint16 (),
"e_phentsize", bin_field_new_uint16 (),
"e_phnum", bin_field_new_uint16 (),
"e_shentsize", bin_field_new_uint16 (),
"e_shnum", bin_field_new_uint16 (),
"e_shstrndx", bin_field_new_uint16 (),
NULL);
parser->shn_entry = bin_format_new (
is_big_endian,
"sh_name", bin_field_new_uint32 (),
"sh_type", bin_field_new_uint32 (),
"sh_flags", make_word (is_64),
"sh_addr", make_word (is_64),
"sh_offset", make_word (is_64),
"sh_size", make_word (is_64),
"sh_link", bin_field_new_uint32 (),
"sh_info", bin_field_new_uint32 (),
"sh_addralign", make_word (is_64),
"sh_entsize", make_word (is_64),
NULL);
if (is_64)
{
parser->sym_format = bin_format_new (
is_big_endian,
"st_name", bin_field_new_uint32 (),
"st_info", bin_field_new_uint8 (),
"st_other", bin_field_new_uint8 (),
"st_shndx", bin_field_new_uint16 (),
"st_value", bin_field_new_uint64 (),
"st_size", bin_field_new_uint64 (),
NULL);
}
else
{
parser->sym_format = bin_format_new (
is_big_endian,
"st_name", bin_field_new_uint32 (),
"st_value", bin_field_new_uint32 (),
"st_size", bin_field_new_uint32 (),
"st_info", bin_field_new_uint8 (),
"st_other", bin_field_new_uint8 (),
"st_shndx", bin_field_new_uint16 (),
NULL);
}
}

8
elfparser.h Normal file
View File

@ -0,0 +1,8 @@
typedef struct ElfSym ElfSym;
typedef struct ElfParser ElfParser;
ElfParser *elf_parser_new (const guchar *data,
gsize length);
const ElfSym *elf_parser_lookup_symbol (ElfParser *parser,
gulong address);