+/*
+ * © 2018 Aaron Taylor <ataylor at subgeniuskitty dot com>
+ * See LICENSE.txt file for copyright and license details.
+ *
+ * Some parts of this file were imported from other projects.
+ * See `ned/misc/licenses/` and git history for details.
+ */
+
+#include <stdint.h>
+
+#ifndef NED_A_OUT_H
+#define NED_A_OUT_H
+
+/*
+ * A binary file consists of up to 7 sections. In order, these sections are:
+ *
+ * exec header
+ *
+ * Contains parameters used by the kernel to load a binary file into
+ * memory and execute it, and by the link editor to combine a binary
+ * file with other binary files. This section is the only mandatory
+ * one.
+ *
+ * text segment
+ *
+ * Contains machine code and related data that are loaded into memory
+ * when a program executes. May be loaded read-only.
+ *
+ * data segment
+ *
+ * Contains initialized data; always loaded into writable memory.
+ *
+ * text relocations
+ *
+ * Contains records used by the link editor to update pointers in the
+ * text segment when combining binary files.
+ *
+ * data relocations
+ *
+ * Like the text relocation section, but for data segment pointers.
+ *
+ * symbol table
+ *
+ * Contains records used by the link editor to cross reference the
+ * addresses of named variables and functions (`symbols') between
+ * binary files.
+ *
+ * string table
+ *
+ * Contains the character strings corresponding to the symbol names.
+ *
+ * Every binary file begins with an exec structure:
+ */
+
+struct exec {
+ uint32_t a_midmag; /* flags<<26 | mid<<16 | magic */
+ uint32_t a_text; /* text segment size */
+ uint32_t a_data; /* initialized data size */
+ uint32_t a_bss; /* uninitialized data size */
+ uint32_t a_syms; /* symbol table size */
+ uint32_t a_entry; /* entry point */
+ uint32_t a_trsize; /* text relocation size */
+ uint32_t a_drsize; /* data relocation size */
+};
+
+/*
+ * The exec fields have the following functions:
+ *
+ * a_midmag
+ *
+ * This field is stored in host byte-order. It has a number of
+ * sub-components accessed by the macros N_GETFLAG(), N_GETMID(), and
+ * N_GETMAGIC(), and set by the macro N_SETMAGIC().
+ *
+ * The macro N_GETFLAG() returns a few flags:
+ *
+ * EX_DYNAMIC
+ *
+ * indicates that the executable requires the services of the
+ * run-time link editor.
+ *
+ * EX_PIC
+ *
+ * indicates that the object contains position independent code.
+ *
+ * If both EX_DYNAMIC and EX_PIC are set, the object file is a position
+ * independent executable image (e.g. a shared library), which is to be
+ * loaded into the process address space by the run-time link editor.
+ *
+ * The macro N_GETMID() returns the machine-id. This indicates which
+ * machine(s) the binary is intended to run on.
+ *
+ * N_GETMAGIC() specifies the magic number, which uniquely identifies
+ * binary files and distinguishes different loading conventions. The field
+ * must contain one of the following values:
+ *
+ * NED_MAGIC1
+ *
+ * The text and data segments immediately follow the header and
+ * are contiguous. Both text and data segments are loaded into
+ * writable memory.
+ *
+ * a_text
+ *
+ * Contains the size of the text segment in bytes.
+ *
+ * a_data
+ *
+ * Contains the size of the data segment in bytes.
+ *
+ * a_bss
+ *
+ * Contains the size of the bss segment in bytes.
+ *
+ * a_syms
+ *
+ * Contains the size of the symbol table segment in bytes.
+ *
+ * a_entry
+ *
+ * Contains the address in memory of the entry point of the program.
+ *
+ * a_trsize
+ *
+ * Contains the size in bytes of the text relocation table.
+ *
+ * a_drsize
+ *
+ * Contains the size in bytes of the data relocation table.
+ *
+ * The <a.out.h> include file defines several macros which use an exec
+ * structure to test consistency or to locate section offsets in the binary
+ * file.
+ *
+ * N_BADMAG(exec)
+ *
+ * Nonzero if the a_magic field does not contain a recognized value.
+ *
+ * N_TXTOFF(exec)
+ *
+ * The byte offset in the binary file of the beginning of the text
+ * segment.
+ *
+ * N_SYMOFF(exec)
+ *
+ * The byte offset of the beginning of the symbol table.
+ *
+ * N_STROFF(exec)
+ *
+ * The byte offset of the beginning of the string table.
+ */
+
+#define N_GETMAGIC(ex) ((ex).a_midmag & 0xffff)
+#define N_GETMID(ex) (((ex).a_midmag >> 16) & 0x03ff)
+#define N_GETFLAG(ex) (((ex).a_midmag >> 26) & 0x3f)
+#define N_SETMAGIC(ex,mag,mid,flag) \
+ ((ex).a_midmag = (((flag) & 0x3f) <<26) | (((mid) & 0x03ff) << 16) | ((mag) & 0xffff))
+
+#define N_BADMAG(ex) (N_GETMAGIC(ex) != NED_MAGIC1)
+
+#define N_TXTOFF(ex) (sizeof(struct exec))
+#define N_DATOFF(ex) (N_TXTOFF(ex) + (ex).a_text)
+#define N_RELOFF(ex) (N_DATOFF(ex) + (ex).a_data)
+#define N_SYMOFF(ex) (N_RELOFF(ex) + (ex).a_trsize + (ex).a_drsize)
+#define N_STROFF(ex) (N_SYMOFF(ex) + (ex).a_syms)
+
+/* There doesn't appear to be any pattern to magic number assignments. */
+/* See: /usr/src/contrib/file/magic/Magdir/aout */
+#define NED_MAGIC1 0x107
+
+/* There doesn't seem to be any pattern to Machine ID number assignments. */
+/* For now, I'm using the sum of the ASCII values for "NED". */
+#define MID_NED 0xD7 /* NED binary */
+
+#define EX_PIC 0x10 /* contains position independent code */
+#define EX_DYNAMIC 0x20 /* contains run-time link-edit info */
+#define EX_DPMASK 0x30 /* mask for the above */
+
+/*
+ * Relocation records have a standard format which is described by the
+ * relocation_info structure:
+ */
+
+struct relocation_info {
+ uint32_t r_address; /* offset in text or data segment */
+ uint32_t r_symbolnum : 24, /* ordinal number of add symbol */
+ r_pcrel : 1, /* 1 if value should be pc-relative */
+ r_length : 2, /* log base 2 of value's width */
+ r_extern : 1, /* 1 if need to add symbol to value */
+ r_baserel : 1, /* linkage table relative */
+ r_jmptable : 1, /* relocate to jump table */
+ r_relative : 1, /* load address relative */
+ r_copy : 1; /* run time copy */
+};
+
+/*
+ * The relocation_info fields are used as follows:
+ *
+ * r_address
+ *
+ * Contains the byte offset of a pointer that needs to be link-edited.
+ * Text relocation offsets are reckoned from the start of the text
+ * segment, and data relocation offsets from the start of the data
+ * segment. The link editor adds the value that is already stored at this
+ * offset into the new value that it computes using this relocation
+ * record.
+ *
+ * r_symbolnum
+ *
+ * Contains the ordinal number of a symbol structure in the symbol table
+ * (it is not a byte offset). After the link editor resolves the absolute
+ * address for this symbol, it adds that address to the pointer that is
+ * under going relocation. (If the r_extern bit is clear, the situation is
+ * different; see below.)
+ *
+ * r_pcrel
+ *
+ * If this is set, the link editor assumes that it is updating a pointer
+ * that is part of a machine code instruction using pc-relative
+ * addressing. The address of the relocated pointer is implicitly added to
+ * its value when the running program uses it.
+ *
+ * r_length
+ *
+ * Contains the log base2 of the length of the pointer in bytes; 0 for
+ * 1-byte displacements, 1 for 2-byte displacements, 2 for 4-byte
+ * displacements.
+ *
+ * r_extern
+ *
+ * Set if this relocation requires an external reference; the link editor
+ * must use a symbol address to update the pointer. When the r_extern bit
+ * is clear, the relocation is `local'; the link editor updates the
+ * pointer to reflect changes in the load addresses of the various
+ * segments, rather than changes in the value of a symbol (except when
+ * r_baserel is also set (see below). In this case, the content of the
+ * r_symbolnum field is an n_type value (see below); this type field tells
+ * the link editor what segment the relocated pointer points into.
+ *
+ * r_baserel
+ *
+ * If set, the symbol, as identified by the r_symbolnum field, is to be
+ * relocated to an offset into the Global Offset Table. At runtime, the
+ * entry in the Global Offset Table at this offset is set to be the
+ * address of the symbol.
+ *
+ * r_jmptable
+ *
+ * If set, the symbol, as identified by the r_symbolnum field, is to be
+ * relocated to an offset into the Procedure Linkage Table.
+ *
+ * r_relative
+ *
+ * If set, this relocation is relative to the (run-time) load address of
+ * the image this object file is going to be a part of. This type of
+ * relocation only occurs in shared objects.
+ *
+ * r_copy
+ *
+ * If set, this relocation record identifies a symbol whose contents
+ * should be copied to the location given in r_address. The copying is
+ * done by the runtime link-editor from a suitable data item in a shared
+ * object.
+ *
+ * Symbols map names to addresses (or more generally, strings to values).
+ * Since the link-editor adjusts addresses, a symbol's name must be used to
+ * stand for its address until an absolute value has been assigned. Symbols
+ * consist of a fixed-length record in the symbol table and a variable-length
+ * name in the string table. The symbol table is an array of nlist structures:
+ */
+
+struct nlist {
+ union {
+ char * n_name;
+ uint32_t n_strx;
+ } n_un;
+ uint32_t n_type;
+ uint32_t n_other;
+ uint32_t n_desc;
+ uint32_t n_value;
+};
+
+/*
+ * The fields are used as follows:
+ *
+ * n_un.n_strx
+ *
+ * Contains a byte offset into the string table for the name of this
+ * symbol.
+ *
+ * n_un.n_name
+ *
+ * Used by the runtime link editor. Contains a pointer to the string in
+ * memory.
+ *
+ * n_type
+ *
+ * Used by the link editor to determine how to update the symbol's value.
+ * The n_type field is broken down into three sub-fields using bitmasks.
+ * The link editor treats symbols with the N_EXT type bit set as
+ * `external' symbols and permits references to them from other binary
+ * files. The N_TYPE mask selects bits of interest to the link editor:
+ *
+ * N_UNDF
+ *
+ * An undefined symbol. The link editor must locate an external
+ * symbol with the same name in another binary file to determine
+ * the absolute value of this symbol. As a special case, if the
+ * n_value field is nonzero and no binary file in the link-edit
+ * defines this symbol, the link-editor will resolve this symbol
+ * to an address in the bss segment, reserving an amount of bytes
+ * equal to n_value. If this symbol is undefined in more than one
+ * binary file and the binary files do not agree on the size, the
+ * link editor chooses the greatest size found across all
+ * binaries.
+ *
+ * N_ABS
+ *
+ * An absolute symbol. The link editor does not update an absolute
+ * symbol.
+ *
+ * N_TEXT
+ *
+ * A text symbol. This symbol's value is a text address and the
+ * link editor will update it when it merges binary files.
+ *
+ * N_DATA
+ *
+ * A data symbol; similar to N_TEXT but for data addresses. The
+ * values for text and data symbols are not file offsets but
+ * addresses; to recover the file offsets, it is necessary to
+ * identify the loaded address of the beginning of the
+ * corresponding section and subtract it, then add the offset of
+ * the section.
+ *
+ * N_BSS
+ *
+ * A bss symbol; like text or data symbols but has no
+ * corresponding offset in the binary file.
+ *
+ * The N_STAB mask selects bits of interest to symbolic debuggers.
+ *
+ * n_other
+ *
+ * This field provides information on the nature of the symbol independent
+ * of the symbol's location in terms of segments as determined by the
+ * n_type field. Currently, the lower 4 bits of the n_other field hold one
+ * of two values: AUX_FUNC and AUX_OBJECT. AUX_FUNC associates the symbol
+ * with a callable function, while AUX_OBJECT associates the symbol with
+ * data, irrespective of their locations in either the text or the data
+ * segment.
+ *
+ * n_desc
+ *
+ * Reserved for use by debuggers; passed untouched by the link editor.
+ * Different debuggers use this field for different purposes.
+ *
+ * n_value
+ *
+ * Contains the value of the symbol. For text, data and bss symbols, this
+ * is an address; for other symbols (such as debugger symbols), the value
+ * may be arbitrary.
+ *
+ * The string table consists of a 32-bit length followed by null-terminated
+ * symbol strings. The length represents the sizeof the entire table in bytes,
+ * so its minimum value (or the offset of the first string) is always 4 on
+ * 32-bit machines.
+ */
+
+/* Used in nlist.n_type. */
+#define N_UNDF 0x00 /* undefined */
+#define N_ABS 0x02 /* absolute address */
+#define N_TEXT 0x04 /* text segment */
+#define N_DATA 0x08 /* data segment */
+#define N_BSS 0x10 /* bss segment */
+
+#define N_EXT 0x01 /* external (global) bit, OR'ed in */
+#define N_TYPE 0xff /* mask for all the type bits */
+#define N_STAB 0xffffff00 /* mask for debugger symbols -- stab(5) */
+
+/* Used in nlist.n_other */
+#define AUX_FUNC 1 /* Function */
+#define AUX_OBJECT 2 /* Data */
+
+#endif