Switch to a simpler conceptual model. Update to binparser API changes.

2007-02-24  Soren Sandman <sandmann@daimi.au.dk>

        * binparser.[ch]: Switch to a simpler conceptual model.
        * elfparser.c: Update to binparser API changes.
        * TODO: updates



svn path=/trunk/; revision=352
This commit is contained in:
Soren Sandman
2007-02-24 07:15:24 +00:00
committed by Søren Sandmann Pedersen
parent ea18b8e991
commit 494e40a912
5 changed files with 657 additions and 555 deletions

View File

@ -1,3 +1,9 @@
2007-02-24 Soren Sandman <sandmann@daimi.au.dk>
* binparser.[ch]: Switch to a simpler conceptual model.
* elfparser.c: Update to binparser API changes.
* TODO: updates
Fri Feb 9 16:53:29 2007 Søren Sandmann <sandmann@redhat.com>
* Update copyright notices

81
TODO
View File

@ -103,40 +103,21 @@ Before 1.2:
* crc32 checking probably doesn't belong in elfparser.c
* Rethink binparser. Maybe the default mode should be:
- there is a current offset
- you can move the cursor
- _goto()
- _align()
- you can read structs with "begin_struct (format) / end_struct()"
Or maybe just "set_format()" that would accept NULL?
- when you are reading a struct, you can skip records with _index()
- you can read fields with get_string/get_uint by passing a name.
- you can read anonymous strings and uints by passing NULL for name
This is allowed even when not reading structs. Or maybe this
should be separate functions. Advantages:
- they can skip ahead, unlike fields accessors
- you can access specific types (8,16,32,64)
- there is no "name" field
Disadvantage:
- the field accesors would need renaming.
bin_parser_get_uint_field ()
is not really that bad though.
Maybe begin_record() could return a structure you could
use to access that particular record? Would nicely solve
the problems with "goto" and "index".
bin_record_get_uint();
What should begin/end be called? They will have different
objects passed.
bin_parser_get_record (parser) -> record
bin_record_free (record);
- Maybe support for indirect strings? Ie., get_string() in elfparser
- This will require endianness to be a per-parser property. Which is
probably just fine. Although d-bus actually has
per-message endianness. Maybe there could be a settable
"endianness" property.
* Missing things in binparser.[ch]
Also need to add error checking.
- it's inconvenient that you have to pass in both a parser _and_
a record. The record should just contain a pointer to the parser
- the bin_parser_seek_record (..., 1); idiom is a little dubious
- maybe convert BIN_UINT32 => { BIN_UINT, 4 }
we already have the width in the struct.
- Add error checking
Also need to add error checking.
- "native endian" is probably not useful. Maybe go back to just
having big/little endian.
* Rename stack_stash_foreach_by_address() to stack_stash_foreach_unique(),
or maybe not ...
@ -694,6 +675,40 @@ Later:
-=-=-=-=-=-=-=-=-=-=-=-=-=-=- ALREADY DONE -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
* Rethink binparser. Maybe the default mode should be:
- there is a current offset
- you can move the cursor
- _goto()
- _align()
- you can read structs with "begin_struct (format) / end_struct()"
Or maybe just "set_format()" that would accept NULL?
- when you are reading a struct, you can skip records with _index()
- you can read fields with get_string/get_uint by passing a name.
- you can read anonymous strings and uints by passing NULL for name
This is allowed even when not reading structs. Or maybe this
should be separate functions. Advantages:
- they can skip ahead, unlike fields accessors
- you can access specific types (8,16,32,64)
- there is no "name" field
Disadvantage:
- the field accesors would need renaming.
bin_parser_get_uint_field ()
is not really that bad though.
Maybe begin_record() could return a structure you could
use to access that particular record? Would nicely solve
the problems with "goto" and "index".
bin_record_get_uint();
What should begin/end be called? They will have different
objects passed.
bin_parser_get_record (parser) -> record
bin_record_free (record);
- Maybe support for indirect strings? Ie., get_string() in elfparser
- This will require endianness to be a per-parser property. Which is
probably just fine. Although d-bus actually has
per-message endianness. Maybe there could be a settable
"endianness" property.
* Don't look in $(libdir) for separate debug files (since $libdir is
the libdir for sysprof, not a system wide libdir). Tim Rowley.
Fix is probably to hardcode /usr/lib, and also look in $libdir.

View File

@ -16,57 +16,50 @@
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <string.h>
#include <stdlib.h>
#include <glib.h>
#include <stdarg.h>
#include "binparser.h"
typedef struct ParserFrame ParserFrame;
#include "binparser2.h"
struct BinRecord
{
BinFormat * format;
int index;
gsize offset;
BinParser * parser;
};
struct BinField
{
guint64 offset;
int width;
int align;
char * name;
};
struct BinFormat
{
gboolean big_endian;
int n_fields;
BinField * fields;
};
typedef struct Field Field;
struct BinParser
{
gsize offset;
const guchar * data;
gsize length;
gboolean cache_in_use;
BinRecord cache;
gsize offset;
const char * error_msg;
GList * records;
BinEndian endian;
gsize saved_offset;
};
struct Field
{
char name[BIN_MAX_NAME];
guint offset; /* from beginning of struct */
guint width;
BinType type;
};
struct BinRecord
{
int n_fields;
Field fields[1];
};
BinParser *
bin_parser_new (const guchar *data,
gsize length)
bin_parser_new (const guchar *data,
gsize length)
{
BinParser *parser = g_new0 (BinParser, 1);
parser->offset = 0;
parser->data = data;
parser->length = length;
parser->cache_in_use = FALSE;
parser->offset = 0;
parser->error_msg = NULL;
parser->records = NULL;
parser->endian = BIN_NATIVE_ENDIAN;
return parser;
}
@ -74,31 +67,28 @@ bin_parser_new (const guchar *data,
void
bin_parser_free (BinParser *parser)
{
GList *list;
for (list = parser->records; list != NULL; list = list->next)
{
BinRecord *record = list->data;
g_free (record);
}
g_free (parser);
}
static GQueue *
read_varargs (va_list args,
const char * name,
BinField * field)
const guchar *
bin_parser_get_data (BinParser *parser)
{
GQueue *queue = g_queue_new ();
gpointer p;
if (name)
{
g_queue_push_tail (queue, (gpointer)name);
g_queue_push_tail (queue, field);
p = va_arg (args, gpointer);
while (p)
{
g_queue_push_tail (queue, p);
p = va_arg (args, gpointer);
}
}
return queue;
return parser->data;
}
gsize
bin_parser_get_length (BinParser *parser)
{
return parser->length;
}
static guint64
@ -117,84 +107,168 @@ align (guint64 offset, int alignment)
return offset;
}
gsize
bin_format_get_size (BinFormat *format)
static int
get_field_width (const BinField *field)
{
BinField *last_field = &(format->fields[format->n_fields - 1]);
BinField *first_field = &(format->fields[0]);
switch (field->type)
{
case BIN_UINT8:
return 1;
case BIN_UINT16:
return 2;
case BIN_UINT32:
return 4;
case BIN_UINT64:
return 8;
case BIN_UNINTERPRETED:
return field->n_bytes;
}
g_assert_not_reached ();
return -1;
}
static int
get_align (const BinField *field)
{
if (field->type == BIN_UNINTERPRETED)
return 1;
else
return get_field_width (field);
}
BinRecord *
bin_parser_create_record (BinParser *parser,
const BinField *fields)
{
BinRecord *record;
int i, n_fields;
guint offset;
n_fields = 0;
while (fields[n_fields].name[0] != '\0')
{
n_fields++;
#if 0
g_print ("type: %d\n", fields[n_fields].type);
#endif
}
record = g_malloc0 (sizeof (BinRecord) +
(n_fields - 1) * sizeof (Field));
offset = 0;
record->n_fields = n_fields;
for (i = 0; i < n_fields; ++i)
{
const BinField *bin_field = &(fields[i]);
Field *field = &(record->fields[i]);
offset = align (offset, get_align (bin_field));
strncpy (field->name, bin_field->name, BIN_MAX_NAME - 1);
field->offset = offset;
field->type = bin_field->type;
field->width = get_field_width (bin_field);
#if 0
g_print ("created field %s with type %d\n", field->name, field->type);
#endif
offset += record->fields[i].width;
}
parser->records = g_list_prepend (parser->records, record);
return record;
}
gboolean
bin_parser_error (BinParser *parser)
{
return parser->error_msg != NULL;
}
void
bin_parser_clear_error (BinParser *parser)
{
parser->error_msg = NULL;
}
const gchar *
bin_parser_get_error_msg (BinParser *parser)
{
return parser->error_msg;
}
void
bin_parser_set_endian (BinParser *parser,
BinEndian endian)
{
parser->endian = endian;
}
/* Move current offset */
gsize
bin_parser_get_offset (BinParser *parser)
{
return parser->offset;
}
void
bin_parser_set_offset (BinParser *parser,
gsize offset)
{
parser->offset = offset;
}
void
bin_parser_align (BinParser *parser,
gsize byte_width)
{
parser->offset = align (parser->offset, byte_width);
}
gsize
bin_record_get_size (BinRecord *record)
{
Field *last_field = &(record->fields[record->n_fields - 1]);
Field *first_field = &(record->fields[0]);
/* align to first field, since that's the alignment of the record
* following this one
*/
return align (last_field->offset + last_field->width, first_field->width);
}
BinFormat *
bin_format_new (gboolean big_endian,
const char *name, BinField *field,
...)
void
bin_parser_seek_record (BinParser *parser,
BinRecord *record,
int n_records)
{
GQueue *queue = g_queue_new ();
BinFormat *format = g_new0 (BinFormat, 1);
GList *list;
int i;
guint64 offset;
va_list args;
format->big_endian = big_endian;
/* Build queue of child types */
va_start (args, field);
queue = read_varargs (args, name, field);
va_end (args);
g_assert (queue->length % 2 == 0);
format->n_fields = queue->length / 2;
format->fields = g_new (BinField, format->n_fields);
i = 0;
offset = 0;
for (list = queue->head; list != NULL; list = list->next->next)
{
const char *name = list->data;
BinField *field = list->next->data;
offset = align (offset, field->align);
format->fields[i].name = g_strdup (name);
format->fields[i].width = field->width;
format->fields[i].offset = offset;
offset += field->width;
++i;
g_free (field);
}
g_queue_free (queue);
return format;
gsize record_size = bin_record_get_size (record);
parser->offset += record_size * n_records;
}
static const BinField *
get_field (BinFormat *format,
const gchar *name)
void
bin_parser_save (BinParser *parser)
{
int i;
for (i = 0; i < format->n_fields; ++i)
{
BinField *field = &(format->fields[i]);
if (strcmp (field->name, name) == 0)
return field;
}
return NULL;
parser->saved_offset = parser->offset;
}
void
bin_parser_restore (BinParser *parser)
{
parser->offset = parser->saved_offset;
}
/* retrieve data */
static guint64
convert_uint (const guchar *data,
gboolean big_endian,
int width)
BinEndian endian,
BinType type)
{
guint8 r8;
guint16 r16;
@ -205,39 +279,41 @@ convert_uint (const guchar *data,
if (width == 4)
g_print ("converting at %p %d %d %d %d\n", data, data[0], data[1], data[2], data[3]);
#endif
/* FIXME: check that we are within the file */
switch (width)
switch (type)
{
case 1:
case BIN_UINT8:
r8 = *(guint8 *)data;
return r8;
case 2:
case BIN_UINT16:
r16 = *(guint16 *)data;
if (big_endian)
if (endian == BIN_BIG_ENDIAN)
r16 = GUINT16_FROM_BE (r16);
else
else if (endian == BIN_LITTLE_ENDIAN)
r16 = GUINT16_FROM_LE (r16);
return r16;
case 4:
case BIN_UINT32:
r32 = *(guint32 *)data;
if (big_endian)
if (endian == BIN_BIG_ENDIAN)
r32 = GUINT32_FROM_BE (r32);
else
else if (endian == BIN_LITTLE_ENDIAN)
r32 = GUINT32_FROM_LE (r32);
return r32;
case 8:
case BIN_UINT64:
r64 = *(guint64 *)data;
if (big_endian)
if (endian == BIN_BIG_ENDIAN)
r64 = GUINT64_FROM_BE (r64);
else
else if (endian == BIN_LITTLE_ENDIAN)
r64 = GUINT64_FROM_LE (r64);
return r64;
@ -248,61 +324,36 @@ convert_uint (const guchar *data,
}
}
guint32
bin_parser_get_uint32 (BinParser *parser)
static int
get_uint_width (BinType type)
{
guint32 result;
/* FIXME: This is broken for two reasons:
*
* (1) It assumes file_endian==machine_endian
*
* (2) It doesn't check for file overrun.
*
*/
result = *(guint32 *)(parser->data + parser->offset);
parser->offset += 4;
return result;
switch (type)
{
case BIN_UINT8:
return 1;
case BIN_UINT16:
return 2;
case BIN_UINT32:
return 4;
case BIN_UINT64:
return 8;
default:
return -1;
}
}
static BinField *
new_field_uint (int width)
guint64
bin_parser_get_uint (BinParser *parser,
BinType type)
{
BinField *field = g_new0 (BinField, 1);
field->width = width;
field->align = width;
return field;
guint64 r = convert_uint (parser->data + parser->offset, parser->endian, type);
parser->offset += get_uint_width (type);
return r;
}
BinField *
bin_field_new_uint8 (void)
{
return new_field_uint (1);
}
BinField *
bin_field_new_uint16 (void)
{
return new_field_uint (2);
}
BinField *
bin_field_new_uint32 (void)
{
return new_field_uint (4);
}
BinField *
bin_field_new_uint64 (void)
{
return new_field_uint (8);
}
const gchar *
const char *
bin_parser_get_string (BinParser *parser)
{
const char *result;
@ -314,116 +365,46 @@ bin_parser_get_string (BinParser *parser)
parser->offset += strlen (result) + 1;
return result;
}
void
bin_parser_align (BinParser *parser,
gsize byte_width)
{
parser->offset = align (parser->offset, byte_width);
}
void
bin_parser_goto (BinParser *parser,
gsize offset)
{
parser->offset = offset;
}
BinParser *
bin_record_get_parser (BinRecord *record)
{
return record->parser;
}
const gchar *
bin_record_get_string_indirect (BinRecord *record,
const char *name,
gsize str_table)
{
BinParser *parser = record->parser;
const char *result = NULL;
gsize index;
gsize saved_offset;
saved_offset = bin_parser_get_offset (record->parser);
index = bin_record_get_uint (record, name);
}
bin_parser_goto (record->parser, str_table + index);
static const Field *
get_field (BinRecord *format,
const gchar *name)
{
int i;
result = bin_parser_get_string (parser);
bin_parser_goto (record->parser, saved_offset);
return result;
}
gsize
bin_parser_get_offset (BinParser *parser)
{
g_return_val_if_fail (parser != NULL, 0);
return parser->offset;
}
const guchar *
bin_parser_get_data (BinParser *parser)
{
return parser->data;
}
gsize
bin_parser_get_length (BinParser *parser)
{
return parser->length;
}
/* Record */
BinRecord *
bin_parser_get_record (BinParser *parser,
BinFormat *format,
gsize offset)
{
BinRecord *record;
if (!parser->cache_in_use)
for (i = 0; i < format->n_fields; ++i)
{
parser->cache_in_use = TRUE;
record = &(parser->cache);
Field *field = &(format->fields[i]);
if (strcmp (field->name, name) == 0)
{
#if 0
g_print ("found field: %s (offset: %d, type %d)\n", field->name, field->offset, field->type);
#endif
return field;
}
}
else
{
record = g_new0 (BinRecord, 1);
}
record->parser = parser;
record->index = 0;
record->offset = offset;
record->format = format;
return record;
}
void
bin_record_free (BinRecord *record)
{
if (record == &(record->parser->cache))
record->parser->cache_in_use = FALSE;
else
g_free (record);
return NULL;
}
guint64
bin_record_get_uint (BinRecord *record,
const char *name)
bin_parser_get_uint_field (BinParser *parser,
BinRecord *record,
const char *name)
{
const guint8 *pos;
const BinField *field;
const Field *field = get_field (record, name);
const guchar *pos;
field = get_field (record->format, name);
pos = record->parser->data + record->offset + field->offset;
#if 0
g_print ("moving to %d (%d + %d)\n", parser->offset + field->offset, parser->offset, field->offset);
#endif
pos = parser->data + parser->offset + field->offset;
#if 0
g_print (" record offset: %d\n", record->offset);
@ -431,7 +412,7 @@ bin_record_get_uint (BinRecord *record,
g_print (" field offset %d\n", field->offset);
#endif
if (record->offset + field->offset + field->width > record->parser->length)
if (pos > parser->data + parser->length)
{
/* FIXME: generate error */
return 0;
@ -441,34 +422,5 @@ bin_record_get_uint (BinRecord *record,
g_print (" uint %d at %p => %d\n", field->width, pos, convert_uint (pos, record->format->big_endian, field->width));
#endif
return convert_uint (pos, record->format->big_endian, field->width);
}
void
bin_record_index (BinRecord *record,
int index)
{
gsize format_size = bin_format_get_size (record->format);
record->offset -= record->index * format_size;
record->offset += index * format_size;
record->index = index;
}
gsize
bin_record_get_offset (BinRecord *record)
{
return record->offset;
}
/* Fields */
BinField *
bin_field_new_fixed_array (int n_elements,
int element_size)
{
BinField *field = g_new0 (BinField, 1);
field->width = n_elements * element_size;
field->align = element_size;
return field;
return convert_uint (pos, parser->endian, field->type);
}

View File

@ -17,52 +17,98 @@
*/
#include <glib.h>
typedef struct BinField BinField;
typedef struct BinFormat BinFormat;
typedef struct BinParser BinParser;
typedef struct BinRecord BinRecord;
typedef struct BinField BinField;
/* The model is:
*
* BinParser has an offset associated with it. This offset can be
* manipulated with methods
*
* goto - go to absolute position from file start
* goto_rel - go to relative positio
* goto_record_rel - skip the given number of records
* align - move forward until aligned to given width
* save/restore - save/restore the current offset (stack)
*
* and queried with
*
* get_offset - return current offset in bytes from start
*
* data can be retrieved with
*
* get_uint - return a uint of given width, and skip
* get_string - return a null terminated stringm, and skip
* get_pstring - return a 'pascal' string with given length
*
* get_uint_field - return the named field
*
* formats should probably be definable as static data.
*
* A bin parser also has an associated "status" with it. This can be
* OK, or error. It is ok to use a parser with an error status, but
* the data returned will not be meaningfull.
*
*
*/
#define BIN_MAX_NAME 52
typedef enum
{
BIN_LITTLE_ENDIAN,
BIN_BIG_ENDIAN,
BIN_NATIVE_ENDIAN
} BinEndian;
typedef enum
{
/* More types can (and probably will) be added in the future */
BIN_UINT8,
BIN_UINT16,
BIN_UINT32,
BIN_UINT64,
BIN_UNINTERPRETED
} BinType;
struct BinField {
const char name[BIN_MAX_NAME];
char type;
char n_bytes; /* number of bytes if type
* is UNINTERPRETED */
};
/* BinParser */
BinParser * bin_parser_new (const guchar *data,
gsize length);
void bin_parser_free (BinParser *parser);
const guchar *bin_parser_get_data (BinParser *parser);
gsize bin_parser_get_length (BinParser *parser);
gsize bin_parser_get_offset (BinParser *parser);
void bin_parser_align (BinParser *parser,
gsize byte_width);
void bin_parser_goto (BinParser *parser,
gsize offset);
void bin_parser_set_endian (BinParser *parser,
BinEndian endian);
gboolean bin_parser_error (BinParser *parser);
void bin_parser_clear_error (BinParser *parser);
const gchar * bin_parser_get_error_msg (BinParser *parser);
BinRecord * bin_parser_create_record (BinParser *parser,
const BinField *fields);
gsize bin_record_get_size (BinRecord *record);
/* Move current offset */
gsize bin_parser_get_offset (BinParser *parser);
void bin_parser_set_offset (BinParser *parser,
gsize offset);
void bin_parser_align (BinParser *parser,
gsize byte_width);
void bin_parser_seek_record (BinParser *parser,
BinRecord *record,
int n_records);
void bin_parser_save (BinParser *parser);
void bin_parser_restore (BinParser *parser);
/* retrieve data */
guint64 bin_parser_get_uint (BinParser *parser,
BinType type);
const char * bin_parser_get_string (BinParser *parser);
guint32 bin_parser_get_uint32 (BinParser *parser);
/* Record */
BinRecord * bin_parser_get_record (BinParser *parser,
BinFormat *format,
gsize offset);
void bin_record_free (BinRecord *record);
guint64 bin_record_get_uint (BinRecord *record,
const char *name);
void bin_record_index (BinRecord *record,
int index);
gsize bin_record_get_offset (BinRecord *record);
const gchar *bin_record_get_string_indirect (BinRecord *record,
const char *name,
gsize str_table);
BinParser * bin_record_get_parser (BinRecord *record);
/* BinFormat */
BinFormat *bin_format_new (gboolean big_endian,
const char *name,
BinField *field,
...);
gsize bin_format_get_size (BinFormat *format);
/* BinField */
BinField *bin_field_new_uint8 (void);
BinField *bin_field_new_uint16 (void);
BinField *bin_field_new_uint32 (void);
BinField *bin_field_new_uint64 (void);
BinField *bin_field_new_fixed_array (int n_elements,
int element_size);
guint64 bin_parser_get_uint_field (BinParser *parser,
BinRecord *record,
const char *field);

View File

@ -44,51 +44,71 @@ struct ElfParser
{
BinParser * parser;
BinFormat * header;
BinFormat * strtab_format;
BinFormat * shn_entry;
BinFormat * sym_format;
BinRecord * header;
BinRecord * strtab_format;
BinRecord * shn_entry;
BinRecord * sym_format;
int n_sections;
Section ** sections;
int n_symbols;
ElfSym * symbols;
gsize sym_strings;
GMappedFile * file;
const Section * text_section;
};
static gboolean parse_elf_signature (const guchar *data, gsize length,
gboolean *is_64, gboolean *is_be);
static void make_formats (ElfParser *parser,
gboolean is_64,
gboolean is_big_endian);
static void make_formats (ElfParser *parser, gboolean is_64);
static const char *
get_string_indirect (BinParser *parser,
BinRecord *record,
const char *name,
gsize str_table)
{
const char *result = NULL;
gsize index;
bin_parser_save (parser);
index = bin_parser_get_uint_field (parser, record, name);
bin_parser_set_offset (parser, str_table + index);
result = bin_parser_get_string (parser);
bin_parser_restore (parser);
return result;
}
static Section *
section_new (BinRecord *record,
section_new (BinParser *parser,
BinRecord *record,
gsize name_table)
{
Section *section = g_new (Section, 1);
guint64 flags;
section->name = bin_record_get_string_indirect (
record, "sh_name", name_table);
section->size = bin_record_get_uint (record, "sh_size");
section->offset = bin_record_get_uint (record, "sh_offset");
flags = bin_record_get_uint (record, "sh_flags");
section->name = get_string_indirect (parser, record, "sh_name", name_table);
section->size = bin_parser_get_uint_field (parser, record, "sh_size");
section->offset = bin_parser_get_uint_field (parser, record, "sh_offset");
flags = bin_parser_get_uint_field (parser, record, "sh_flags");
section->allocated = !!(flags & SHF_ALLOC);
if (section->allocated)
section->load_address = bin_record_get_uint (record, "sh_addr");
section->load_address = bin_parser_get_uint_field (parser, record, "sh_addr");
else
section->load_address = 0;
section->type = bin_record_get_uint (record, "sh_type");
section->type = bin_parser_get_uint_field (parser, record, "sh_type");
return section;
}
@ -104,7 +124,7 @@ find_section (ElfParser *parser,
guint type)
{
int i;
for (i = 0; i < parser->n_sections; ++i)
{
Section *section = parser->sections[i];
@ -112,7 +132,7 @@ find_section (ElfParser *parser,
if (strcmp (section->name, name) == 0 && section->type == type)
return section;
}
return NULL;
}
@ -126,7 +146,6 @@ elf_parser_new_from_data (const guchar *data,
gsize section_names;
gsize section_headers;
int i;
BinRecord *elf_header, *shn_entry;
if (!parse_elf_signature (data, length, &is_64, &is_big_endian))
{
@ -138,41 +157,47 @@ elf_parser_new_from_data (const guchar *data,
parser->parser = bin_parser_new (data, length);
make_formats (parser, is_64, is_big_endian);
if (is_big_endian)
bin_parser_set_endian (parser->parser, BIN_BIG_ENDIAN);
else
bin_parser_set_endian (parser->parser, BIN_LITTLE_ENDIAN);
make_formats (parser, is_64);
/* Read ELF header */
elf_header = bin_parser_get_record (parser->parser, parser->header, 0);
bin_parser_set_offset (parser->parser, 0);
parser->n_sections = bin_record_get_uint (elf_header, "e_shnum");
section_names_idx = bin_record_get_uint (elf_header, "e_shstrndx");
section_headers = bin_record_get_uint (elf_header, "e_shoff");
bin_record_free (elf_header);
parser->n_sections = bin_parser_get_uint_field (parser->parser, parser->header, "e_shnum");
section_names_idx = bin_parser_get_uint_field (parser->parser, parser->header, "e_shstrndx");
section_headers = bin_parser_get_uint_field (parser->parser, parser->header, "e_shoff");
/* Read section headers */
parser->sections = g_new0 (Section *, parser->n_sections);
shn_entry = bin_parser_get_record (parser->parser,
parser->shn_entry, section_headers);
bin_record_index (shn_entry, section_names_idx);
section_names = bin_record_get_uint (shn_entry, "sh_offset");
bin_parser_set_offset (parser->parser, section_headers);
bin_parser_seek_record (parser->parser, parser->shn_entry,
section_names_idx);
section_names = bin_parser_get_uint_field (parser->parser, parser->shn_entry, "sh_offset");
for (i = 0; i < parser->n_sections; ++i)
{
bin_record_index (shn_entry, i);
bin_parser_set_offset (parser->parser, section_headers);
bin_parser_seek_record (parser->parser, parser->shn_entry, i);
parser->sections[i] = section_new (shn_entry, section_names);
parser->sections[i] = section_new (parser->parser,
parser->shn_entry,
section_names);
}
/* Cache the text section */
parser->text_section = find_section (parser, ".text", SHT_PROGBITS);
if (!parser->text_section)
parser->text_section = find_section (parser, ".text", SHT_NOBITS);
bin_record_free (shn_entry);
return parser;
}
@ -185,23 +210,23 @@ elf_parser_new (const char *filename,
ElfParser *parser;
GMappedFile *file = g_mapped_file_new (filename, FALSE, NULL);
if (!file)
return NULL;
#if 0
g_print ("elf parser new : %s\n", filename);
#endif
data = (guchar *)g_mapped_file_get_contents (file);
length = g_mapped_file_get_length (file);
#if 0
g_print ("data %p: for %s\n", data, filename);
#endif
parser = elf_parser_new_from_data (data, length);
if (!parser)
{
g_mapped_file_free (file);
@ -209,14 +234,14 @@ elf_parser_new (const char *filename,
}
parser->file = file;
#if 0
g_print ("Elf file: %s (debug: %s)\n",
filename, elf_parser_get_debug_link (parser, NULL));
#endif
parser->file = file;
#if 0
if (!parser->symbols)
g_print ("at this point %s has no symbols\n", filename);
@ -277,15 +302,15 @@ elf_parser_get_crc32 (ElfParser *parser)
gsize length;
gulong crc;
gsize i;
data = bin_parser_get_data (parser->parser);
length = bin_parser_get_length (parser->parser);
crc = 0xffffffff;
for (i = 0; i < length; ++i)
crc = crc32_table[(crc ^ data[i]) & 0xff] ^ (crc >> 8);
/* We just read the entire file into memory, but we only really
* need the symbol table, so swap the whole thing out.
*
@ -301,14 +326,14 @@ void
elf_parser_free (ElfParser *parser)
{
int i;
for (i = 0; i < parser->n_sections; ++i)
section_free (parser->sections[i]);
g_free (parser->sections);
if (parser->file)
g_mapped_file_free (parser->file);
bin_parser_free (parser->parser);
g_free (parser);
@ -323,7 +348,7 @@ elf_demangle (const char *name)
#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
char *demangled = sysprof_cplus_demangle (name, DMGL_PARAMS | DMGL_ANSI);
if (demangled)
return demangled;
else
@ -338,7 +363,7 @@ compare_sym (const void *a, const void *b)
{
const ElfSym *sym_a = a;
const ElfSym *sym_b = b;
if (sym_a->address < sym_b->address)
return -1;
else if (sym_a->address == sym_b->address)
@ -352,11 +377,11 @@ static void
dump_symbols (ElfParser *parser, ElfSym *syms, guint n_syms)
{
int i;
for (i = 0; i < n_syms; ++i)
{
ElfSym *s = &(syms[i]);
g_print (" %s: %lx\n", elf_parser_get_sym_name (parser, s), s->address);
}
}
@ -367,28 +392,32 @@ read_table (ElfParser *parser,
const Section *sym_table,
const Section *str_table)
{
int sym_size = bin_format_get_size (parser->sym_format);
int sym_size = bin_record_get_size (parser->sym_format);
int i;
int n_functions;
BinRecord *symbol;
parser->n_symbols = sym_table->size / sym_size;
parser->symbols = g_new (ElfSym, parser->n_symbols);
symbol = bin_parser_get_record (parser->parser, parser->sym_format, sym_table->offset);
#if 0
g_print ("sym table offset: %d\n", sym_table->offset);
#endif
bin_parser_set_offset (parser->parser, sym_table->offset);
n_functions = 0;
#if 0
g_print ("n syms: %d\n", parser->n_symbols);
#endif
for (i = 0; i < parser->n_symbols; ++i)
{
guint info;
gulong addr;
gulong offset;
bin_record_index (symbol, i);
info = bin_record_get_uint (symbol, "st_info");
addr = bin_record_get_uint (symbol, "st_value");
offset = bin_record_get_offset (symbol);
info = bin_parser_get_uint_field (parser->parser, parser->sym_format, "st_info");
addr = bin_parser_get_uint_field (parser->parser, parser->sym_format, "st_value");
offset = bin_parser_get_offset (parser->parser);
if (addr != 0 &&
(info & 0xf) == STT_FUNC &&
@ -399,15 +428,22 @@ read_table (ElfParser *parser,
parser->symbols[n_functions].offset = offset;
n_functions++;
}
}
#if 0
g_print ("read symbol: %s\n", get_string_indirect (parser->parser,
parser->sym_format, "st_name",
str_table->offset));
#endif
bin_parser_seek_record (parser->parser, parser->sym_format, 1);
}
bin_record_free (symbol);
parser->sym_strings = str_table->offset;
parser->n_symbols = n_functions;
parser->symbols = g_renew (ElfSym, parser->symbols, parser->n_symbols);
qsort (parser->symbols, parser->n_symbols, sizeof (ElfSym), compare_sym);
}
@ -418,7 +454,7 @@ read_symbols (ElfParser *parser)
const Section *strtab = find_section (parser, ".strtab", SHT_STRTAB);
const Section *dynsym = find_section (parser, ".dynsym", SHT_DYNSYM);
const Section *dynstr = find_section (parser, ".dynstr", SHT_STRTAB);
if (symtab && strtab)
{
read_table (parser, symtab, strtab);
@ -451,10 +487,10 @@ do_lookup (ElfSym *symbols,
{
if (address >= symbols[last].address)
return &(symbols[last]);
last--;
}
return NULL;
}
else
@ -476,38 +512,43 @@ elf_parser_lookup_symbol (ElfParser *parser,
const ElfSym *result;
if (!parser->symbols)
{
#if 0
g_print ("reading symbols\n");
#endif
read_symbols (parser);
}
if (parser->n_symbols == 0)
return NULL;
if (!parser->text_section)
return NULL;
address += parser->text_section->load_address;
#if 0
g_print ("the address we are looking up is %p\n", address);
#endif
result = do_lookup (parser->symbols, address, 0, parser->n_symbols - 1);
#if 0
if (result)
{
g_print ("found %s at %lx\n", elf_parser_get_sym_name (parser, result), result->address);
}
#endif
if (result)
{
gulong size;
BinRecord *record;
record = bin_parser_get_record (parser->parser, parser->sym_format, result->offset);
size = bin_record_get_uint (record, "st_size");
bin_record_free (record);
bin_parser_set_offset (parser->parser, result->offset);
size = bin_parser_get_uint_field (parser->parser,
parser->sym_format, "st_size");
if (result->address + size <= address)
result = NULL;
}
@ -519,10 +560,10 @@ gulong
elf_parser_get_text_offset (ElfParser *parser)
{
g_return_val_if_fail (parser != NULL, (gulong)-1);
if (!parser->text_section)
return (gulong)-1;
return parser->text_section->offset;
}
@ -532,18 +573,18 @@ elf_parser_get_debug_link (ElfParser *parser, guint32 *crc32)
const Section *debug_link = find_section (parser, ".gnu_debuglink",
SHT_PROGBITS);
const gchar *result;
if (!debug_link)
return NULL;
bin_parser_goto (parser->parser, debug_link->offset);
bin_parser_set_offset (parser->parser, debug_link->offset);
result = bin_parser_get_string (parser->parser);
bin_parser_align (parser->parser, 4);
if (crc32)
*crc32 = bin_parser_get_uint32 (parser->parser);
*crc32 = bin_parser_get_uint (parser->parser, BIN_UINT32);
return result;
}
@ -552,7 +593,7 @@ const guchar *
elf_parser_get_eh_frame (ElfParser *parser)
{
const Section *eh_frame = find_section (parser, ".eh_frame", SHT_PROGBITS);
if (eh_frame)
return bin_parser_get_data (parser->parser) + eh_frame->offset;
else
@ -564,16 +605,12 @@ elf_parser_get_sym_name (ElfParser *parser,
const ElfSym *sym)
{
const char *result;
BinRecord *symbol;
g_return_val_if_fail (parser != NULL, NULL);
symbol = bin_parser_get_record (parser->parser, parser->sym_format, sym->offset);
result = bin_record_get_string_indirect (symbol, "st_name",
parser->sym_strings);
bin_record_free (symbol);
g_return_val_if_fail (parser != NULL, NULL);
bin_parser_set_offset (parser->parser, sym->offset);
result = get_string_indirect (
parser->parser, parser->sym_format, "st_name", parser->sym_strings);
return result;
}
@ -616,81 +653,127 @@ parse_elf_signature (const guchar *data,
}
if (is_64)
*is_64 = (EI_CLASS == ELFCLASS64);
*is_64 = (data[EI_CLASS] == ELFCLASS64);
if (is_be)
*is_be = (EI_DATA == ELFDATA2MSB);
*is_be = (data[EI_DATA] == ELFDATA2MSB);
return TRUE;
}
static BinField *
make_word (gboolean is_64)
static void
get_formats (gboolean is_64,
const BinField **elf_header,
const BinField **shn_entry,
const BinField **sym_format)
{
static const BinField elf64_header[] = {
{ "e_ident", BIN_UNINTERPRETED, EI_NIDENT },
{ "e_type", BIN_UINT16 },
{ "e_machine", BIN_UINT16 },
{ "e_version", BIN_UINT32 },
{ "e_entry", BIN_UINT64 },
{ "e_phoff", BIN_UINT64 },
{ "e_shoff", BIN_UINT64 },
{ "e_flags", BIN_UINT32 },
{ "e_ehsize", BIN_UINT16 },
{ "e_phentsize", BIN_UINT16 },
{ "e_phnum", BIN_UINT16 },
{ "e_shentsize", BIN_UINT16 },
{ "e_shnum", BIN_UINT16 },
{ "e_shstrndx", BIN_UINT16 },
{ "" },
};
static const BinField elf32_header[] = {
{ "e_ident", BIN_UNINTERPRETED, EI_NIDENT },
{ "e_type", BIN_UINT16 },
{ "e_machine", BIN_UINT16 },
{ "e_version", BIN_UINT32 },
{ "e_entry", BIN_UINT32 },
{ "e_phoff", BIN_UINT32 },
{ "e_shoff", BIN_UINT32 },
{ "e_flags", BIN_UINT32 },
{ "e_ehsize", BIN_UINT16 },
{ "e_phentsize", BIN_UINT16 },
{ "e_phnum", BIN_UINT16 },
{ "e_shentsize", BIN_UINT16 },
{ "e_shnum", BIN_UINT16 },
{ "e_shstrndx", BIN_UINT16 },
{ "" },
};
static const BinField shn64_entry[] = {
{ "sh_name", BIN_UINT32 },
{ "sh_type", BIN_UINT32 },
{ "sh_flags", BIN_UINT64 },
{ "sh_addr", BIN_UINT64 },
{ "sh_offset", BIN_UINT64 },
{ "sh_size", BIN_UINT64 },
{ "sh_link", BIN_UINT32 },
{ "sh_info", BIN_UINT32 },
{ "sh_addralign", BIN_UINT64 },
{ "sh_entsize", BIN_UINT64 },
{ "" }
};
static const BinField shn32_entry[] = {
{ "sh_name", BIN_UINT32 },
{ "sh_type", BIN_UINT32 },
{ "sh_flags", BIN_UINT32 },
{ "sh_addr", BIN_UINT32 },
{ "sh_offset", BIN_UINT32 },
{ "sh_size", BIN_UINT32 },
{ "sh_link", BIN_UINT32 },
{ "sh_info", BIN_UINT32 },
{ "sh_addralign", BIN_UINT32 },
{ "sh_entsize", BIN_UINT32 },
{ "" }
};
static const BinField sym64_format[] = {
{ "st_name", BIN_UINT32 },
{ "st_info", BIN_UINT8 },
{ "st_other", BIN_UINT8 },
{ "st_shndx", BIN_UINT16 },
{ "st_value", BIN_UINT64 },
{ "st_size", BIN_UINT64 },
{ "" }
};
static const BinField sym32_format[] = {
{ "st_name", BIN_UINT32 },
{ "st_value", BIN_UINT32 },
{ "st_size", BIN_UINT32 },
{ "st_info", BIN_UINT8 },
{ "st_other", BIN_UINT8 },
{ "st_shndx", BIN_UINT16 },
{ "" },
};
if (is_64)
return bin_field_new_uint64 ();
{
*elf_header = elf64_header;
*shn_entry = shn64_entry;
*sym_format = sym64_format;
}
else
return bin_field_new_uint32 ();
{
*elf_header = elf32_header;
*shn_entry = shn32_entry;
*sym_format = sym32_format;
}
}
static void
make_formats (ElfParser *parser, gboolean is_64, gboolean is_big_endian)
make_formats (ElfParser *parser, gboolean is_64)
{
parser->header = bin_format_new (
is_big_endian,
"e_ident", bin_field_new_fixed_array (EI_NIDENT, 1),
"e_type", bin_field_new_uint16 (),
"e_machine", bin_field_new_uint16 (),
"e_version", bin_field_new_uint32 (),
"e_entry", make_word (is_64),
"e_phoff", make_word (is_64),
"e_shoff", make_word (is_64),
"e_flags", bin_field_new_uint32 (),
"e_ehsize", bin_field_new_uint16 (),
"e_phentsize", bin_field_new_uint16 (),
"e_phnum", bin_field_new_uint16 (),
"e_shentsize", bin_field_new_uint16 (),
"e_shnum", bin_field_new_uint16 (),
"e_shstrndx", bin_field_new_uint16 (),
NULL);
parser->shn_entry = bin_format_new (
is_big_endian,
"sh_name", bin_field_new_uint32 (),
"sh_type", bin_field_new_uint32 (),
"sh_flags", make_word (is_64),
"sh_addr", make_word (is_64),
"sh_offset", make_word (is_64),
"sh_size", make_word (is_64),
"sh_link", bin_field_new_uint32 (),
"sh_info", bin_field_new_uint32 (),
"sh_addralign", make_word (is_64),
"sh_entsize", make_word (is_64),
NULL);
if (is_64)
{
parser->sym_format = bin_format_new (
is_big_endian,
"st_name", bin_field_new_uint32 (),
"st_info", bin_field_new_uint8 (),
"st_other", bin_field_new_uint8 (),
"st_shndx", bin_field_new_uint16 (),
"st_value", bin_field_new_uint64 (),
"st_size", bin_field_new_uint64 (),
NULL);
}
else
{
parser->sym_format = bin_format_new (
is_big_endian,
"st_name", bin_field_new_uint32 (),
"st_value", bin_field_new_uint32 (),
"st_size", bin_field_new_uint32 (),
"st_info", bin_field_new_uint8 (),
"st_other", bin_field_new_uint8 (),
"st_shndx", bin_field_new_uint16 (),
NULL);
}
const BinField *elf_header, *shn_entry, *sym_format;
get_formats (is_64, &elf_header, &shn_entry, &sym_format);
parser->header = bin_parser_create_record (parser->parser, elf_header);
parser->shn_entry = bin_parser_create_record (parser->parser, shn_entry);
parser->sym_format = bin_parser_create_record (parser->parser, sym_format);
}