X-Git-Url: http://git.subgeniuskitty.com/ned1/.git/blobdiff_plain/bc5b63cf94c1f263bbe3757237b242f40711203b..dc87d2445f5e13146058bd672ad8399fad9e73fb:/common/a.out.h diff --git a/common/a.out.h b/common/a.out.h new file mode 100644 index 0000000..d3b721e --- /dev/null +++ b/common/a.out.h @@ -0,0 +1,385 @@ +/* + * © 2018 Aaron Taylor + * See LICENSE.txt file for copyright and license details. + * + * Some parts of this file were imported from other projects. + * See `ned/misc/licenses/` and git history for details. + */ + +#include + +#ifndef NED_A_OUT_H +#define NED_A_OUT_H + +/* + * A binary file consists of up to 7 sections. In order, these sections are: + * + * exec header + * + * Contains parameters used by the kernel to load a binary file into + * memory and execute it, and by the link editor to combine a binary + * file with other binary files. This section is the only mandatory + * one. + * + * text segment + * + * Contains machine code and related data that are loaded into memory + * when a program executes. May be loaded read-only. + * + * data segment + * + * Contains initialized data; always loaded into writable memory. + * + * text relocations + * + * Contains records used by the link editor to update pointers in the + * text segment when combining binary files. + * + * data relocations + * + * Like the text relocation section, but for data segment pointers. + * + * symbol table + * + * Contains records used by the link editor to cross reference the + * addresses of named variables and functions (`symbols') between + * binary files. + * + * string table + * + * Contains the character strings corresponding to the symbol names. + * + * Every binary file begins with an exec structure: + */ + +struct exec { + uint32_t a_midmag; /* flags<<26 | mid<<16 | magic */ + uint32_t a_text; /* text segment size */ + uint32_t a_data; /* initialized data size */ + uint32_t a_bss; /* uninitialized data size */ + uint32_t a_syms; /* symbol table size */ + uint32_t a_entry; /* entry point */ + uint32_t a_trsize; /* text relocation size */ + uint32_t a_drsize; /* data relocation size */ +}; + +/* + * The exec fields have the following functions: + * + * a_midmag + * + * This field is stored in host byte-order. It has a number of + * sub-components accessed by the macros N_GETFLAG(), N_GETMID(), and + * N_GETMAGIC(), and set by the macro N_SETMAGIC(). + * + * The macro N_GETFLAG() returns a few flags: + * + * EX_DYNAMIC + * + * indicates that the executable requires the services of the + * run-time link editor. + * + * EX_PIC + * + * indicates that the object contains position independent code. + * + * If both EX_DYNAMIC and EX_PIC are set, the object file is a position + * independent executable image (e.g. a shared library), which is to be + * loaded into the process address space by the run-time link editor. + * + * The macro N_GETMID() returns the machine-id. This indicates which + * machine(s) the binary is intended to run on. + * + * N_GETMAGIC() specifies the magic number, which uniquely identifies + * binary files and distinguishes different loading conventions. The field + * must contain one of the following values: + * + * NED_MAGIC1 + * + * The text and data segments immediately follow the header and + * are contiguous. Both text and data segments are loaded into + * writable memory. + * + * a_text + * + * Contains the size of the text segment in bytes. + * + * a_data + * + * Contains the size of the data segment in bytes. + * + * a_bss + * + * Contains the size of the bss segment in bytes. + * + * a_syms + * + * Contains the size of the symbol table segment in bytes. + * + * a_entry + * + * Contains the address in memory of the entry point of the program. + * + * a_trsize + * + * Contains the size in bytes of the text relocation table. + * + * a_drsize + * + * Contains the size in bytes of the data relocation table. + * + * The include file defines several macros which use an exec + * structure to test consistency or to locate section offsets in the binary + * file. + * + * N_BADMAG(exec) + * + * Nonzero if the a_magic field does not contain a recognized value. + * + * N_TXTOFF(exec) + * + * The byte offset in the binary file of the beginning of the text + * segment. + * + * N_SYMOFF(exec) + * + * The byte offset of the beginning of the symbol table. + * + * N_STROFF(exec) + * + * The byte offset of the beginning of the string table. + */ + +#define N_GETMAGIC(ex) ((ex).a_midmag & 0xffff) +#define N_GETMID(ex) (((ex).a_midmag >> 16) & 0x03ff) +#define N_GETFLAG(ex) (((ex).a_midmag >> 26) & 0x3f) +#define N_SETMAGIC(ex,mag,mid,flag) \ + ((ex).a_midmag = (((flag) & 0x3f) <<26) | (((mid) & 0x03ff) << 16) | ((mag) & 0xffff)) + +#define N_BADMAG(ex) (N_GETMAGIC(ex) != NED_MAGIC1) + +#define N_TXTOFF(ex) (sizeof(struct exec)) +#define N_DATOFF(ex) (N_TXTOFF(ex) + (ex).a_text) +#define N_RELOFF(ex) (N_DATOFF(ex) + (ex).a_data) +#define N_SYMOFF(ex) (N_RELOFF(ex) + (ex).a_trsize + (ex).a_drsize) +#define N_STROFF(ex) (N_SYMOFF(ex) + (ex).a_syms) + +/* There doesn't appear to be any pattern to magic number assignments. */ +/* See: /usr/src/contrib/file/magic/Magdir/aout */ +#define NED_MAGIC1 0x107 + +/* There doesn't seem to be any pattern to Machine ID number assignments. */ +/* For now, I'm using the sum of the ASCII values for "NED". */ +#define MID_NED 0xD7 /* NED binary */ + +#define EX_PIC 0x10 /* contains position independent code */ +#define EX_DYNAMIC 0x20 /* contains run-time link-edit info */ +#define EX_DPMASK 0x30 /* mask for the above */ + +/* + * Relocation records have a standard format which is described by the + * relocation_info structure: + */ + +struct relocation_info { + uint32_t r_address; /* offset in text or data segment */ + uint32_t r_symbolnum : 24, /* ordinal number of add symbol */ + r_pcrel : 1, /* 1 if value should be pc-relative */ + r_length : 2, /* log base 2 of value's width */ + r_extern : 1, /* 1 if need to add symbol to value */ + r_baserel : 1, /* linkage table relative */ + r_jmptable : 1, /* relocate to jump table */ + r_relative : 1, /* load address relative */ + r_copy : 1; /* run time copy */ +}; + +/* + * The relocation_info fields are used as follows: + * + * r_address + * + * Contains the byte offset of a pointer that needs to be link-edited. + * Text relocation offsets are reckoned from the start of the text + * segment, and data relocation offsets from the start of the data + * segment. The link editor adds the value that is already stored at this + * offset into the new value that it computes using this relocation + * record. + * + * r_symbolnum + * + * Contains the ordinal number of a symbol structure in the symbol table + * (it is not a byte offset). After the link editor resolves the absolute + * address for this symbol, it adds that address to the pointer that is + * under going relocation. (If the r_extern bit is clear, the situation is + * different; see below.) + * + * r_pcrel + * + * If this is set, the link editor assumes that it is updating a pointer + * that is part of a machine code instruction using pc-relative + * addressing. The address of the relocated pointer is implicitly added to + * its value when the running program uses it. + * + * r_length + * + * Contains the log base2 of the length of the pointer in bytes; 0 for + * 1-byte displacements, 1 for 2-byte displacements, 2 for 4-byte + * displacements. + * + * r_extern + * + * Set if this relocation requires an external reference; the link editor + * must use a symbol address to update the pointer. When the r_extern bit + * is clear, the relocation is `local'; the link editor updates the + * pointer to reflect changes in the load addresses of the various + * segments, rather than changes in the value of a symbol (except when + * r_baserel is also set (see below). In this case, the content of the + * r_symbolnum field is an n_type value (see below); this type field tells + * the link editor what segment the relocated pointer points into. + * + * r_baserel + * + * If set, the symbol, as identified by the r_symbolnum field, is to be + * relocated to an offset into the Global Offset Table. At runtime, the + * entry in the Global Offset Table at this offset is set to be the + * address of the symbol. + * + * r_jmptable + * + * If set, the symbol, as identified by the r_symbolnum field, is to be + * relocated to an offset into the Procedure Linkage Table. + * + * r_relative + * + * If set, this relocation is relative to the (run-time) load address of + * the image this object file is going to be a part of. This type of + * relocation only occurs in shared objects. + * + * r_copy + * + * If set, this relocation record identifies a symbol whose contents + * should be copied to the location given in r_address. The copying is + * done by the runtime link-editor from a suitable data item in a shared + * object. + * + * Symbols map names to addresses (or more generally, strings to values). + * Since the link-editor adjusts addresses, a symbol's name must be used to + * stand for its address until an absolute value has been assigned. Symbols + * consist of a fixed-length record in the symbol table and a variable-length + * name in the string table. The symbol table is an array of nlist structures: + */ + +struct nlist { + union { + char * n_name; + uint32_t n_strx; + } n_un; + uint32_t n_type; + uint32_t n_other; + uint32_t n_desc; + uint32_t n_value; +}; + +/* + * The fields are used as follows: + * + * n_un.n_strx + * + * Contains a byte offset into the string table for the name of this + * symbol. + * + * n_un.n_name + * + * Used by the runtime link editor. Contains a pointer to the string in + * memory. + * + * n_type + * + * Used by the link editor to determine how to update the symbol's value. + * The n_type field is broken down into three sub-fields using bitmasks. + * The link editor treats symbols with the N_EXT type bit set as + * `external' symbols and permits references to them from other binary + * files. The N_TYPE mask selects bits of interest to the link editor: + * + * N_UNDF + * + * An undefined symbol. The link editor must locate an external + * symbol with the same name in another binary file to determine + * the absolute value of this symbol. As a special case, if the + * n_value field is nonzero and no binary file in the link-edit + * defines this symbol, the link-editor will resolve this symbol + * to an address in the bss segment, reserving an amount of bytes + * equal to n_value. If this symbol is undefined in more than one + * binary file and the binary files do not agree on the size, the + * link editor chooses the greatest size found across all + * binaries. + * + * N_ABS + * + * An absolute symbol. The link editor does not update an absolute + * symbol. + * + * N_TEXT + * + * A text symbol. This symbol's value is a text address and the + * link editor will update it when it merges binary files. + * + * N_DATA + * + * A data symbol; similar to N_TEXT but for data addresses. The + * values for text and data symbols are not file offsets but + * addresses; to recover the file offsets, it is necessary to + * identify the loaded address of the beginning of the + * corresponding section and subtract it, then add the offset of + * the section. + * + * N_BSS + * + * A bss symbol; like text or data symbols but has no + * corresponding offset in the binary file. + * + * The N_STAB mask selects bits of interest to symbolic debuggers. + * + * n_other + * + * This field provides information on the nature of the symbol independent + * of the symbol's location in terms of segments as determined by the + * n_type field. Currently, the lower 4 bits of the n_other field hold one + * of two values: AUX_FUNC and AUX_OBJECT. AUX_FUNC associates the symbol + * with a callable function, while AUX_OBJECT associates the symbol with + * data, irrespective of their locations in either the text or the data + * segment. + * + * n_desc + * + * Reserved for use by debuggers; passed untouched by the link editor. + * Different debuggers use this field for different purposes. + * + * n_value + * + * Contains the value of the symbol. For text, data and bss symbols, this + * is an address; for other symbols (such as debugger symbols), the value + * may be arbitrary. + * + * The string table consists of a 32-bit length followed by null-terminated + * symbol strings. The length represents the sizeof the entire table in bytes, + * so its minimum value (or the offset of the first string) is always 4 on + * 32-bit machines. + */ + +/* Used in nlist.n_type. */ +#define N_UNDF 0x00 /* undefined */ +#define N_ABS 0x02 /* absolute address */ +#define N_TEXT 0x04 /* text segment */ +#define N_DATA 0x08 /* data segment */ +#define N_BSS 0x10 /* bss segment */ + +#define N_EXT 0x01 /* external (global) bit, OR'ed in */ +#define N_TYPE 0xff /* mask for all the type bits */ +#define N_STAB 0xffffff00 /* mask for debugger symbols -- stab(5) */ + +/* Used in nlist.n_other */ +#define AUX_FUNC 1 /* Function */ +#define AUX_OBJECT 2 /* Data */ + +#endif