Remove final bits of binparser

This commit is contained in:
Søren Sandmann Pedersen
2009-10-08 19:25:54 -04:00
parent ca14021fb3
commit 7836e72295
5 changed files with 46 additions and 725 deletions

View File

@ -10,8 +10,6 @@ endif
SYSPROF_CORE = \
binfile.h \
binfile.c \
binparser.h \
binparser.c \
collector.c \
collector.h \
demangle.c \
@ -108,8 +106,6 @@ testunwind_SOURCES = \
demangle.c \
elfparser.c \
elfparser.h \
binparser.c \
binparser.h \
unwind.c \
unwind.h
testunwind_CPPFLAGS = $(CORE_DEP_CFLAGS)
@ -120,8 +116,7 @@ testelf_SOURCES = \
testelf.c \
demangle.c \
elfparser.c \
elfparser.h \
binparser.c \
binparser.h
elfparser.h
testelf_CPPFLAGS = $(CORE_DEP_CFLAGS)
testelf_LDADD = $(CORE_DEP_LIBS)

View File

@ -1,384 +0,0 @@
/* Sysprof -- Sampling, systemwide CPU profiler
* Copyright 2006, 2007, Soeren Sandmann
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <string.h>
#include "binparser.h"
typedef struct Field Field;
struct BinParser
{
const guchar * data;
gsize length;
gsize offset;
const char * error_msg;
GList * records;
BinEndian endian;
gsize saved_offset;
};
struct Field
{
char name[BIN_MAX_NAME];
guint offset; /* from beginning of struct */
guint width;
BinType type;
};
struct BinRecord
{
int n_fields;
Field fields[1];
};
BinParser *
bin_parser_new (const guchar *data,
gsize length)
{
BinParser *parser = g_new0 (BinParser, 1);
parser->data = data;
parser->length = length;
parser->offset = 0;
parser->error_msg = NULL;
parser->records = NULL;
parser->endian = BIN_NATIVE_ENDIAN;
return parser;
}
void
bin_parser_free (BinParser *parser)
{
GList *list;
for (list = parser->records; list != NULL; list = list->next)
{
BinRecord *record = list->data;
g_free (record);
}
g_list_free (parser->records);
g_free (parser);
}
const guchar *
bin_parser_get_data (BinParser *parser)
{
return parser->data;
}
gsize
bin_parser_get_length (BinParser *parser)
{
return parser->length;
}
static guint64
align (guint64 offset, int alignment)
{
/* Note that we can speed this up by assuming alignment'
* is a power of two, since
*
* offset % alignment == offset & (alignemnt - 1)
*
*/
if (offset % alignment != 0)
offset += (alignment - (offset % alignment));
return offset;
}
static int
get_align (const BinField *field)
{
if (field->type == BIN_UNINTERPRETED)
return 1;
else
return field->n_bytes;
}
BinRecord *
bin_parser_create_record (BinParser *parser,
const BinField *fields)
{
BinRecord *record;
int i, n_fields;
guint offset;
n_fields = 0;
while (fields[n_fields].name[0] != '\0')
{
n_fields++;
#if 0
g_print ("type: %d\n", fields[n_fields].type);
#endif
}
record = g_malloc0 (sizeof (BinRecord) +
(n_fields - 1) * sizeof (Field));
offset = 0;
record->n_fields = n_fields;
for (i = 0; i < n_fields; ++i)
{
const BinField *bin_field = &(fields[i]);
Field *field = &(record->fields[i]);
offset = align (offset, get_align (bin_field));
strncpy (field->name, bin_field->name, BIN_MAX_NAME - 1);
field->offset = offset;
field->type = bin_field->type;
field->width = bin_field->n_bytes;
offset += record->fields[i].width;
}
parser->records = g_list_prepend (parser->records, record);
return record;
}
gboolean
bin_parser_error (BinParser *parser)
{
return parser->error_msg != NULL;
}
void
bin_parser_clear_error (BinParser *parser)
{
parser->error_msg = NULL;
}
const gchar *
bin_parser_get_error_msg (BinParser *parser)
{
return parser->error_msg;
}
void
bin_parser_set_endian (BinParser *parser,
BinEndian endian)
{
parser->endian = endian;
}
/* Move current offset */
gsize
bin_parser_get_offset (BinParser *parser)
{
return parser->offset;
}
void
bin_parser_set_offset (BinParser *parser,
gsize offset)
{
parser->offset = offset;
}
void
bin_parser_align (BinParser *parser,
gsize byte_width)
{
parser->offset = align (parser->offset, byte_width);
}
gsize
bin_record_get_size (BinRecord *record)
{
Field *last_field = &(record->fields[record->n_fields - 1]);
Field *first_field = &(record->fields[0]);
/* align to first field, since that's the alignment of the record
* following this one
*/
return align (last_field->offset + last_field->width, first_field->width);
}
void
bin_parser_seek_record (BinParser *parser,
BinRecord *record,
int n_records)
{
gsize record_size = bin_record_get_size (record);
parser->offset += record_size * n_records;
}
void
bin_parser_save (BinParser *parser)
{
parser->saved_offset = parser->offset;
}
void
bin_parser_restore (BinParser *parser)
{
parser->offset = parser->saved_offset;
}
/* retrieve data */
static guint64
convert_uint (const guchar *data,
BinEndian endian,
int width)
{
guint8 r8;
guint16 r16;
guint32 r32;
guint64 r64;
#if 0
if (width == 4)
g_print ("converting at %p %d %d %d %d\n", data, data[0], data[1], data[2], data[3]);
#endif
/* FIXME: check that we are within the file */
switch (width)
{
case 1:
r8 = *(guint8 *)data;
return r8;
case 2:
r16 = *(guint16 *)data;
if (endian == BIN_BIG_ENDIAN)
r16 = GUINT16_FROM_BE (r16);
else if (endian == BIN_LITTLE_ENDIAN)
r16 = GUINT16_FROM_LE (r16);
return r16;
case 4:
r32 = *(guint32 *)data;
if (endian == BIN_BIG_ENDIAN)
r32 = GUINT32_FROM_BE (r32);
else if (endian == BIN_LITTLE_ENDIAN)
r32 = GUINT32_FROM_LE (r32);
return r32;
case 8:
r64 = *(guint64 *)data;
if (endian == BIN_BIG_ENDIAN)
r64 = GUINT64_FROM_BE (r64);
else if (endian == BIN_LITTLE_ENDIAN)
r64 = GUINT64_FROM_LE (r64);
return r64;
default:
g_assert_not_reached();
return 0;
}
}
guint64
bin_parser_get_uint (BinParser *parser,
int width)
{
guint64 r = convert_uint (parser->data + parser->offset, parser->endian, width);
parser->offset += width;
return r;
}
const char *
bin_parser_get_string (BinParser *parser)
{
const char *result;
/* FIXME: check that the string is within the file */
result = (const char *)parser->data + parser->offset;
parser->offset += strlen (result) + 1;
return result;
}
static const Field *
get_field (BinRecord *format,
const gchar *name)
{
int i;
for (i = 0; i < format->n_fields; ++i)
{
Field *field = &(format->fields[i]);
if (strcmp (field->name, name) == 0)
{
#if 0
g_print ("found field: %s (offset: %d, type %d)\n", field->name, field->offset, field->type);
#endif
return field;
}
}
return NULL;
}
guint64
bin_parser_get_uint_field (BinParser *parser,
BinRecord *record,
const char *name)
{
const Field *field = get_field (record, name);
const guchar *pos;
#if 0
g_print ("moving to %d (%d + %d)\n", parser->offset + field->offset, parser->offset, field->offset);
#endif
pos = parser->data + parser->offset + field->offset;
#if 0
g_print (" record offset: %d\n", record->offset);
g_print (" record index: %d\n", record->index);
g_print (" field offset %d\n", field->offset);
#endif
if (pos > parser->data + parser->length)
{
/* FIXME: generate error */
return 0;
}
#if 0
g_print (" uint %d at %p => %d\n", field->width, pos, convert_uint (pos, record->format->big_endian, field->width));
#endif
return convert_uint (pos, parser->endian, field->width);
}

View File

@ -1,110 +0,0 @@
/* Sysprof -- Sampling, systemwide CPU profiler
* Copyright 2006, 2007, Soeren Sandmann
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
#include <glib.h>
typedef struct BinParser BinParser;
typedef struct BinRecord BinRecord;
typedef struct BinField BinField;
/* The model is:
*
* BinParser has an offset associated with it. This offset can be
* manipulated with methods
*
* goto - go to absolute position from file start
* goto_rel - go to relative position
* goto_record_rel - skip the given number of records
* align - move forward until aligned to given width
* save/restore - save/restore the current offset (stack)
*
* and queried with
*
* get_offset - return current offset in bytes from start
*
* data can be retrieved with
*
* get_uint - return a uint of given width, and skip
* get_string - return a null terminated stringm, and skip
* get_pstring - return a 'pascal' string with given length
*
* get_uint_field - return the named field
*
* formats should probably be definable as static data.
*
* A bin parser also has an associated "status" with it. This can be
* OK, or error. It is ok to use a parser with an error status, but
* the data returned will not be meaningfull.
*
*
*/
#define BIN_MAX_NAME 52
typedef enum
{
BIN_LITTLE_ENDIAN,
BIN_BIG_ENDIAN,
BIN_NATIVE_ENDIAN
} BinEndian;
typedef enum
{
/* More types can (and probably will) be added in the future */
BIN_UINT,
BIN_UNINTERPRETED
} BinType;
struct BinField {
const char name[BIN_MAX_NAME];
char type;
char n_bytes; /* number of bytes in the type */
};
BinParser * bin_parser_new (const guchar *data,
gsize length);
void bin_parser_free (BinParser *parser);
const guchar *bin_parser_get_data (BinParser *parser);
gsize bin_parser_get_length (BinParser *parser);
void bin_parser_set_endian (BinParser *parser,
BinEndian endian);
gboolean bin_parser_error (BinParser *parser);
void bin_parser_clear_error (BinParser *parser);
const gchar * bin_parser_get_error_msg (BinParser *parser);
BinRecord * bin_parser_create_record (BinParser *parser,
const BinField *fields);
gsize bin_record_get_size (BinRecord *record);
guint64 bin_parser_get_uint_field (BinParser *parser,
BinRecord *record,
const char *field);
/* Move current offset */
gsize bin_parser_get_offset (BinParser *parser);
void bin_parser_set_offset (BinParser *parser,
gsize offset);
void bin_parser_align (BinParser *parser,
gsize byte_width);
void bin_parser_seek_record (BinParser *parser,
BinRecord *record,
int n_records);
void bin_parser_save (BinParser *parser);
void bin_parser_restore (BinParser *parser);
/* retrieve data */
guint64 bin_parser_get_uint (BinParser *parser,
int width);
const char * bin_parser_get_string (BinParser *parser);

View File

@ -19,7 +19,6 @@
#include <string.h>
#include <elf.h>
#include <sys/mman.h>
#include "binparser.h"
#include "elfparser.h"
typedef struct Section Section;
@ -43,16 +42,8 @@ struct Section
struct ElfParser
{
BinParser * parser;
BinRecord * header;
BinRecord * strtab_format;
BinRecord * shn_entry;
BinRecord * sym_format;
BinRecord * note_format;
gboolean is_64;
const char * data;
const guchar * data;
gsize length;
int n_sections;
@ -72,10 +63,6 @@ struct ElfParser
const Section * text_section;
};
static gboolean parse_elf_signature (const guchar *data, gsize length,
gboolean *is_64, gboolean *is_be);
static void make_formats (ElfParser *parser, gboolean is_64);
/* FIXME: All of these should in principle do endian swapping,
* but sysprof never has to deal with binaries of a different
* endianness than sysprof itself
@ -99,17 +86,7 @@ static void make_formats (ElfParser *parser, gboolean is_64);
return GET_FIELD (parser, 0, Ehdr, 0, field_name); \
}
MAKE_ELF_UINT_ACCESSOR (e_type)
MAKE_ELF_UINT_ACCESSOR (e_machine)
MAKE_ELF_UINT_ACCESSOR (e_version)
MAKE_ELF_UINT_ACCESSOR (e_entry)
MAKE_ELF_UINT_ACCESSOR (e_phoff)
MAKE_ELF_UINT_ACCESSOR (e_shoff)
MAKE_ELF_UINT_ACCESSOR (e_flags)
MAKE_ELF_UINT_ACCESSOR (e_ehsize)
MAKE_ELF_UINT_ACCESSOR (e_phentsize)
MAKE_ELF_UINT_ACCESSOR (e_phnum)
MAKE_ELF_UINT_ACCESSOR (e_shentsize)
MAKE_ELF_UINT_ACCESSOR (e_shnum)
MAKE_ELF_UINT_ACCESSOR (e_shstrndx)
@ -127,10 +104,6 @@ MAKE_SECTION_HEADER_ACCESSOR (sh_flags);
MAKE_SECTION_HEADER_ACCESSOR (sh_addr);
MAKE_SECTION_HEADER_ACCESSOR (sh_offset);
MAKE_SECTION_HEADER_ACCESSOR (sh_size);
MAKE_SECTION_HEADER_ACCESSOR (sh_link);
MAKE_SECTION_HEADER_ACCESSOR (sh_info);
MAKE_SECTION_HEADER_ACCESSOR (sh_addralign);
MAKE_SECTION_HEADER_ACCESSOR (sh_entsize);
#define MAKE_SYMBOL_ACCESSOR(field_name) \
static uint64_t field_name (ElfParser *parser, gulong offset, gulong nth) \
@ -142,7 +115,6 @@ MAKE_SYMBOL_ACCESSOR(st_name);
MAKE_SYMBOL_ACCESSOR(st_info);
MAKE_SYMBOL_ACCESSOR(st_value);
MAKE_SYMBOL_ACCESSOR(st_size);
MAKE_SYMBOL_ACCESSOR(st_other);
MAKE_SYMBOL_ACCESSOR(st_shndx);
static void
@ -169,6 +141,42 @@ find_section (ElfParser *parser,
return NULL;
}
static gboolean
parse_elf_signature (const guchar *data,
gsize length,
gboolean *is_64,
gboolean *is_be)
{
/* FIXME: this function should be able to return an error */
if (length < EI_NIDENT)
{
/* FIXME set error */
return FALSE;
}
if (data[EI_CLASS] != ELFCLASS32 &&
data[EI_CLASS] != ELFCLASS64)
{
/* FIXME set error */
return FALSE;
}
if (data[EI_DATA] != ELFDATA2LSB &&
data[EI_DATA] != ELFDATA2MSB)
{
/* FIXME set error */
return FALSE;
}
if (is_64)
*is_64 = (data[EI_CLASS] == ELFCLASS64);
if (is_be)
*is_be = (data[EI_DATA] == ELFDATA2MSB);
return TRUE;
}
ElfParser *
elf_parser_new_from_data (const guchar *data,
gsize length)
@ -176,7 +184,7 @@ elf_parser_new_from_data (const guchar *data,
ElfParser *parser;
gboolean is_64, is_big_endian;
int section_names_idx;
const char *section_names;
const guchar *section_names;
gsize section_headers;
int i;
@ -189,47 +197,24 @@ elf_parser_new_from_data (const guchar *data,
parser = g_new0 (ElfParser, 1);
parser->is_64 = is_64;
parser->data = (const char *)data;
parser->data = data;
parser->length = length;
#if 0
g_print (" new parser : %p\n", parser);
#endif
parser->parser = bin_parser_new (data, length);
if (is_big_endian)
bin_parser_set_endian (parser->parser, BIN_BIG_ENDIAN);
else
bin_parser_set_endian (parser->parser, BIN_LITTLE_ENDIAN);
make_formats (parser, is_64);
/* Read ELF header */
bin_parser_set_offset (parser->parser, 0);
parser->n_sections = e_shnum (parser);
section_names_idx = e_shstrndx (parser);
section_headers = e_shoff (parser);
/* Read section headers */
parser->sections = g_new0 (Section *, parser->n_sections);
bin_parser_set_offset (parser->parser, section_headers);
bin_parser_save (parser->parser);
bin_parser_seek_record (parser->parser, parser->shn_entry,
section_names_idx);
section_names = parser->data + sh_offset (parser, section_names_idx);
bin_parser_restore (parser->parser);
for (i = 0; i < parser->n_sections; ++i)
{
Section *section = g_new (Section, 1);
@ -400,8 +385,6 @@ elf_parser_free (ElfParser *parser)
g_free (parser->symbols);
bin_parser_free (parser->parser);
if (parser->filename)
g_free (parser->filename);
@ -464,7 +447,7 @@ read_table (ElfParser *parser,
const Section *sym_table,
const Section *str_table)
{
int sym_size = bin_record_get_size (parser->sym_format);
int sym_size = GET_SIZE (parser, Sym);
int i, n_symbols;
#if 0
@ -683,7 +666,7 @@ elf_parser_get_text_offset (ElfParser *parser)
}
static gchar *
make_hex_string (const gchar *data, int n_bytes)
make_hex_string (const guchar *data, int n_bytes)
{
static const char hex_digits[] = {
'0', '1', '2', '3', '4', '5', '6', '7',
@ -729,12 +712,12 @@ elf_parser_get_build_id (ElfParser *parser)
offset += GET_SIZE (parser, Nhdr);
name = parser->data + offset;
name = (char *)(parser->data + offset);
if (strncmp (name, ELF_NOTE_GNU, name_size) != 0 || type != NT_GNU_BUILD_ID)
return NULL;
offset += strlen (parser->data + offset);
offset += strlen (name);
offset = (offset + 3) & (~0x3);
@ -757,7 +740,7 @@ elf_parser_get_debug_link (ElfParser *parser, guint32 *crc32)
offset = debug_link->offset;
result = parser->data + offset;
result = (char *)(parser->data + offset);
if (crc32)
{
@ -828,165 +811,3 @@ elf_parser_get_sym_address (ElfParser *parser,
/*
* Utility functions
*/
static gboolean
parse_elf_signature (const guchar *data,
gsize length,
gboolean *is_64,
gboolean *is_be)
{
/* FIXME: this function should be able to return an error */
if (length < EI_NIDENT)
{
/* FIXME set error */
return FALSE;
}
if (data[EI_CLASS] != ELFCLASS32 &&
data[EI_CLASS] != ELFCLASS64)
{
/* FIXME set error */
return FALSE;
}
if (data[EI_DATA] != ELFDATA2LSB &&
data[EI_DATA] != ELFDATA2MSB)
{
/* FIXME set error */
return FALSE;
}
if (is_64)
*is_64 = (data[EI_CLASS] == ELFCLASS64);
if (is_be)
*is_be = (data[EI_DATA] == ELFDATA2MSB);
return TRUE;
}
static void
get_formats (gboolean is_64,
const BinField **elf_header,
const BinField **shn_entry,
const BinField **sym_format,
const BinField **note_format_out)
{
static const BinField elf64_header[] = {
{ "e_ident", BIN_UNINTERPRETED, EI_NIDENT },
{ "e_type", BIN_UINT, 2 },
{ "e_machine", BIN_UINT, 2 },
{ "e_version", BIN_UINT, 4 },
{ "e_entry", BIN_UINT, 8 },
{ "e_phoff", BIN_UINT, 8 },
{ "e_shoff", BIN_UINT, 8 },
{ "e_flags", BIN_UINT, 4 },
{ "e_ehsize", BIN_UINT, 2 },
{ "e_phentsize", BIN_UINT, 2 },
{ "e_phnum", BIN_UINT, 2 },
{ "e_shentsize", BIN_UINT, 2 },
{ "e_shnum", BIN_UINT, 2 },
{ "e_shstrndx", BIN_UINT, 2 },
{ "" },
};
static const BinField elf32_header[] = {
{ "e_ident", BIN_UNINTERPRETED, EI_NIDENT },
{ "e_type", BIN_UINT, 2 },
{ "e_machine", BIN_UINT, 2 },
{ "e_version", BIN_UINT, 4 },
{ "e_entry", BIN_UINT, 4 },
{ "e_phoff", BIN_UINT, 4 },
{ "e_shoff", BIN_UINT, 4 },
{ "e_flags", BIN_UINT, 4 },
{ "e_ehsize", BIN_UINT, 2 },
{ "e_phentsize", BIN_UINT, 2 },
{ "e_phnum", BIN_UINT, 2 },
{ "e_shentsize", BIN_UINT, 2 },
{ "e_shnum", BIN_UINT, 2 },
{ "e_shstrndx", BIN_UINT, 2 },
{ "" },
};
static const BinField shn64_entry[] = {
{ "sh_name", BIN_UINT, 4 },
{ "sh_type", BIN_UINT, 4 },
{ "sh_flags", BIN_UINT, 8 },
{ "sh_addr", BIN_UINT, 8 },
{ "sh_offset", BIN_UINT, 8 },
{ "sh_size", BIN_UINT, 8 },
{ "sh_link", BIN_UINT, 4 },
{ "sh_info", BIN_UINT, 4 },
{ "sh_addralign", BIN_UINT, 8 },
{ "sh_entsize", BIN_UINT, 8 },
{ "" }
};
static const BinField shn32_entry[] = {
{ "sh_name", BIN_UINT, 4 },
{ "sh_type", BIN_UINT, 4 },
{ "sh_flags", BIN_UINT, 4 },
{ "sh_addr", BIN_UINT, 4 },
{ "sh_offset", BIN_UINT, 4 },
{ "sh_size", BIN_UINT, 4 },
{ "sh_link", BIN_UINT, 4 },
{ "sh_info", BIN_UINT, 4 },
{ "sh_addralign", BIN_UINT, 4 },
{ "sh_entsize", BIN_UINT, 4 },
{ "" }
};
static const BinField sym64_format[] = {
{ "st_name", BIN_UINT, 4 },
{ "st_info", BIN_UINT, 1 },
{ "st_other", BIN_UINT, 1 },
{ "st_shndx", BIN_UINT, 2 },
{ "st_value", BIN_UINT, 8 },
{ "st_size", BIN_UINT, 8 },
{ "" }
};
static const BinField sym32_format[] = {
{ "st_name", BIN_UINT, 4 },
{ "st_value", BIN_UINT, 4 },
{ "st_size", BIN_UINT, 4 },
{ "st_info", BIN_UINT, 1 },
{ "st_other", BIN_UINT, 1 },
{ "st_shndx", BIN_UINT, 2 },
{ "" },
};
static const BinField note_format[] = {
{ "name_size", BIN_UINT, 4 },
{ "desc_size", BIN_UINT, 4 },
{ "type", BIN_UINT, 4 },
};
if (is_64)
{
*elf_header = elf64_header;
*shn_entry = shn64_entry;
*sym_format = sym64_format;
}
else
{
*elf_header = elf32_header;
*shn_entry = shn32_entry;
*sym_format = sym32_format;
}
*note_format_out = note_format;
}
static void
make_formats (ElfParser *parser, gboolean is_64)
{
const BinField *elf_header, *shn_entry, *sym_format, *note_format;
get_formats (is_64, &elf_header, &shn_entry, &sym_format, &note_format);
parser->header = bin_parser_create_record (parser->parser, elf_header);
parser->shn_entry = bin_parser_create_record (parser->parser, shn_entry);
parser->sym_format = bin_parser_create_record (parser->parser, sym_format);
parser->note_format = bin_parser_create_record (parser->parser, note_format);
}

View File

@ -1,5 +1,4 @@
#include "elfparser.h"
#include "binparser.h"
#include <string.h>
/* Pointer encodings, from dwarf2.h. */