Added 'objdump.c' for dumping the text segment of NED a.out files as a binary blob.
[ned1] / common / a.out.h
/*
* © 2018 Aaron Taylor <ataylor at subgeniuskitty dot com>
* See LICENSE.txt file for copyright and license details.
*
* Some parts of this file were imported from other projects.
* See `ned/misc/licenses/` and git history for details.
*/
#include <stdint.h>
#ifndef NED_A_OUT_H
#define NED_A_OUT_H
/*
* A binary file consists of up to 7 sections. In order, these sections are:
*
* exec header
*
* Contains parameters used by the kernel to load a binary file into
* memory and execute it, and by the link editor to combine a binary
* file with other binary files. This section is the only mandatory
* one.
*
* text segment
*
* Contains machine code and related data that are loaded into memory
* when a program executes. May be loaded read-only.
*
* data segment
*
* Contains initialized data; always loaded into writable memory.
*
* text relocations
*
* Contains records used by the link editor to update pointers in the
* text segment when combining binary files.
*
* data relocations
*
* Like the text relocation section, but for data segment pointers.
*
* symbol table
*
* Contains records used by the link editor to cross reference the
* addresses of named variables and functions (`symbols') between
* binary files.
*
* string table
*
* Contains the character strings corresponding to the symbol names.
*
* Every binary file begins with an exec structure:
*/
struct exec {
uint32_t a_midmag; /* flags<<26 | mid<<16 | magic */
uint32_t a_text; /* text segment size */
uint32_t a_data; /* initialized data size */
uint32_t a_bss; /* uninitialized data size */
uint32_t a_syms; /* symbol table size */
uint32_t a_entry; /* entry point */
uint32_t a_trsize; /* text relocation size */
uint32_t a_drsize; /* data relocation size */
};
/*
* The exec fields have the following functions:
*
* a_midmag
*
* This field is stored in host byte-order. It has a number of
* sub-components accessed by the macros N_GETFLAG(), N_GETMID(), and
* N_GETMAGIC(), and set by the macro N_SETMAGIC().
*
* The macro N_GETFLAG() returns a few flags:
*
* EX_DYNAMIC
*
* indicates that the executable requires the services of the
* run-time link editor.
*
* EX_PIC
*
* indicates that the object contains position independent code.
*
* If both EX_DYNAMIC and EX_PIC are set, the object file is a position
* independent executable image (e.g. a shared library), which is to be
* loaded into the process address space by the run-time link editor.
*
* The macro N_GETMID() returns the machine-id. This indicates which
* machine(s) the binary is intended to run on.
*
* N_GETMAGIC() specifies the magic number, which uniquely identifies
* binary files and distinguishes different loading conventions. The field
* must contain one of the following values:
*
* NED_MAGIC1
*
* The text and data segments immediately follow the header and
* are contiguous. Both text and data segments are loaded into
* writable memory.
*
* a_text
*
* Contains the size of the text segment in bytes.
*
* a_data
*
* Contains the size of the data segment in bytes.
*
* a_bss
*
* Contains the size of the bss segment in bytes.
*
* a_syms
*
* Contains the size of the symbol table segment in bytes.
*
* a_entry
*
* Contains the address in memory of the entry point of the program.
*
* a_trsize
*
* Contains the size in bytes of the text relocation table.
*
* a_drsize
*
* Contains the size in bytes of the data relocation table.
*
* The <a.out.h> include file defines several macros which use an exec
* structure to test consistency or to locate section offsets in the binary
* file.
*
* N_BADMAG(exec)
*
* Nonzero if the a_magic field does not contain a recognized value.
*
* N_TXTOFF(exec)
*
* The byte offset in the binary file of the beginning of the text
* segment.
*
* N_SYMOFF(exec)
*
* The byte offset of the beginning of the symbol table.
*
* N_STROFF(exec)
*
* The byte offset of the beginning of the string table.
*/
#define N_GETMAGIC(ex) ((ex).a_midmag & 0xffff)
#define N_GETMID(ex) (((ex).a_midmag >> 16) & 0x03ff)
#define N_GETFLAG(ex) (((ex).a_midmag >> 26) & 0x3f)
#define N_SETMAGIC(ex,mag,mid,flag) \
((ex).a_midmag = (((flag) & 0x3f) <<26) | (((mid) & 0x03ff) << 16) | ((mag) & 0xffff))
#define N_BADMAG(ex) (N_GETMAGIC(ex) != NED_MAGIC1)
#define N_TXTOFF(ex) (sizeof(struct exec))
#define N_DATOFF(ex) (N_TXTOFF(ex) + (ex).a_text)
#define N_RELOFF(ex) (N_DATOFF(ex) + (ex).a_data)
#define N_SYMOFF(ex) (N_RELOFF(ex) + (ex).a_trsize + (ex).a_drsize)
#define N_STROFF(ex) (N_SYMOFF(ex) + (ex).a_syms)
/* There doesn't appear to be any pattern to magic number assignments. */
/* See: /usr/src/contrib/file/magic/Magdir/aout */
#define NED_MAGIC1 0x107
/* There doesn't seem to be any pattern to Machine ID number assignments. */
/* For now, I'm using the sum of the ASCII values for "NED". */
#define MID_NED 0xD7 /* NED binary */
#define EX_PIC 0x10 /* contains position independent code */
#define EX_DYNAMIC 0x20 /* contains run-time link-edit info */
#define EX_DPMASK 0x30 /* mask for the above */
/*
* Relocation records have a standard format which is described by the
* relocation_info structure:
*/
struct relocation_info {
uint32_t r_address; /* offset in text or data segment */
uint32_t r_symbolnum : 24, /* ordinal number of add symbol */
r_pcrel : 1, /* 1 if value should be pc-relative */
r_length : 2, /* log base 2 of value's width */
r_extern : 1, /* 1 if need to add symbol to value */
r_baserel : 1, /* linkage table relative */
r_jmptable : 1, /* relocate to jump table */
r_relative : 1, /* load address relative */
r_copy : 1; /* run time copy */
};
/*
* The relocation_info fields are used as follows:
*
* r_address
*
* Contains the byte offset of a pointer that needs to be link-edited.
* Text relocation offsets are reckoned from the start of the text
* segment, and data relocation offsets from the start of the data
* segment. The link editor adds the value that is already stored at this
* offset into the new value that it computes using this relocation
* record.
*
* r_symbolnum
*
* Contains the ordinal number of a symbol structure in the symbol table
* (it is not a byte offset). After the link editor resolves the absolute
* address for this symbol, it adds that address to the pointer that is
* under going relocation. (If the r_extern bit is clear, the situation is
* different; see below.)
*
* r_pcrel
*
* If this is set, the link editor assumes that it is updating a pointer
* that is part of a machine code instruction using pc-relative
* addressing. The address of the relocated pointer is implicitly added to
* its value when the running program uses it.
*
* r_length
*
* Contains the log base2 of the length of the pointer in bytes; 0 for
* 1-byte displacements, 1 for 2-byte displacements, 2 for 4-byte
* displacements.
*
* r_extern
*
* Set if this relocation requires an external reference; the link editor
* must use a symbol address to update the pointer. When the r_extern bit
* is clear, the relocation is `local'; the link editor updates the
* pointer to reflect changes in the load addresses of the various
* segments, rather than changes in the value of a symbol (except when
* r_baserel is also set (see below). In this case, the content of the
* r_symbolnum field is an n_type value (see below); this type field tells
* the link editor what segment the relocated pointer points into.
*
* r_baserel
*
* If set, the symbol, as identified by the r_symbolnum field, is to be
* relocated to an offset into the Global Offset Table. At runtime, the
* entry in the Global Offset Table at this offset is set to be the
* address of the symbol.
*
* r_jmptable
*
* If set, the symbol, as identified by the r_symbolnum field, is to be
* relocated to an offset into the Procedure Linkage Table.
*
* r_relative
*
* If set, this relocation is relative to the (run-time) load address of
* the image this object file is going to be a part of. This type of
* relocation only occurs in shared objects.
*
* r_copy
*
* If set, this relocation record identifies a symbol whose contents
* should be copied to the location given in r_address. The copying is
* done by the runtime link-editor from a suitable data item in a shared
* object.
*
* Symbols map names to addresses (or more generally, strings to values).
* Since the link-editor adjusts addresses, a symbol's name must be used to
* stand for its address until an absolute value has been assigned. Symbols
* consist of a fixed-length record in the symbol table and a variable-length
* name in the string table. The symbol table is an array of nlist structures:
*/
struct nlist {
union {
char * n_name;
uint32_t n_strx;
} n_un;
uint32_t n_type;
uint32_t n_other;
uint32_t n_desc;
uint32_t n_value;
};
/*
* The fields are used as follows:
*
* n_un.n_strx
*
* Contains a byte offset into the string table for the name of this
* symbol.
*
* n_un.n_name
*
* Used by the runtime link editor. Contains a pointer to the string in
* memory.
*
* n_type
*
* Used by the link editor to determine how to update the symbol's value.
* The n_type field is broken down into three sub-fields using bitmasks.
* The link editor treats symbols with the N_EXT type bit set as
* `external' symbols and permits references to them from other binary
* files. The N_TYPE mask selects bits of interest to the link editor:
*
* N_UNDF
*
* An undefined symbol. The link editor must locate an external
* symbol with the same name in another binary file to determine
* the absolute value of this symbol. As a special case, if the
* n_value field is nonzero and no binary file in the link-edit
* defines this symbol, the link-editor will resolve this symbol
* to an address in the bss segment, reserving an amount of bytes
* equal to n_value. If this symbol is undefined in more than one
* binary file and the binary files do not agree on the size, the
* link editor chooses the greatest size found across all
* binaries.
*
* N_ABS
*
* An absolute symbol. The link editor does not update an absolute
* symbol.
*
* N_TEXT
*
* A text symbol. This symbol's value is a text address and the
* link editor will update it when it merges binary files.
*
* N_DATA
*
* A data symbol; similar to N_TEXT but for data addresses. The
* values for text and data symbols are not file offsets but
* addresses; to recover the file offsets, it is necessary to
* identify the loaded address of the beginning of the
* corresponding section and subtract it, then add the offset of
* the section.
*
* N_BSS
*
* A bss symbol; like text or data symbols but has no
* corresponding offset in the binary file.
*
* The N_STAB mask selects bits of interest to symbolic debuggers.
*
* n_other
*
* This field provides information on the nature of the symbol independent
* of the symbol's location in terms of segments as determined by the
* n_type field. Currently, the lower 4 bits of the n_other field hold one
* of two values: AUX_FUNC and AUX_OBJECT. AUX_FUNC associates the symbol
* with a callable function, while AUX_OBJECT associates the symbol with
* data, irrespective of their locations in either the text or the data
* segment.
*
* n_desc
*
* Reserved for use by debuggers; passed untouched by the link editor.
* Different debuggers use this field for different purposes.
*
* n_value
*
* Contains the value of the symbol. For text, data and bss symbols, this
* is an address; for other symbols (such as debugger symbols), the value
* may be arbitrary.
*
* The string table consists of a 32-bit length followed by null-terminated
* symbol strings. The length represents the sizeof the entire table in bytes,
* so its minimum value (or the offset of the first string) is always 4 on
* 32-bit machines.
*/
/* Used in nlist.n_type. */
#define N_UNDF 0x00 /* undefined */
#define N_ABS 0x02 /* absolute address */
#define N_TEXT 0x04 /* text segment */
#define N_DATA 0x08 /* data segment */
#define N_BSS 0x10 /* bss segment */
#define N_EXT 0x01 /* external (global) bit, OR'ed in */
#define N_TYPE 0xff /* mask for all the type bits */
#define N_STAB 0xffffff00 /* mask for debugger symbols -- stab(5) */
/* Used in nlist.n_other */
#define AUX_FUNC 1 /* Function */
#define AUX_OBJECT 2 /* Data */
#endif