From dc87d2445f5e13146058bd672ad8399fad9e73fb Mon Sep 17 00:00:00 2001 From: Aaron Taylor Date: Mon, 31 Dec 2018 01:44:20 -0800 Subject: [PATCH] Added a.out format for NED binaries to track symbols through to the disassembler. --- common/a.out.h | 385 ++++++++++++++++++++++++++++++++++++++++ docs/compat_matrix.md | 1 + misc/licenses/FreeBSD | 33 ++++ nedasm/nedasm.c | 7 +- nedasm/nedasm_codegen.c | 177 ++++++++++++++++-- neddis/neddis.c | 144 ++++++++++++++- nedsim/nedsim.c | 61 ++++++- 7 files changed, 773 insertions(+), 35 deletions(-) create mode 100644 common/a.out.h create mode 100644 misc/licenses/FreeBSD diff --git a/common/a.out.h b/common/a.out.h new file mode 100644 index 0000000..d3b721e --- /dev/null +++ b/common/a.out.h @@ -0,0 +1,385 @@ +/* + * © 2018 Aaron Taylor + * See LICENSE.txt file for copyright and license details. + * + * Some parts of this file were imported from other projects. + * See `ned/misc/licenses/` and git history for details. + */ + +#include + +#ifndef NED_A_OUT_H +#define NED_A_OUT_H + +/* + * A binary file consists of up to 7 sections. In order, these sections are: + * + * exec header + * + * Contains parameters used by the kernel to load a binary file into + * memory and execute it, and by the link editor to combine a binary + * file with other binary files. This section is the only mandatory + * one. + * + * text segment + * + * Contains machine code and related data that are loaded into memory + * when a program executes. May be loaded read-only. + * + * data segment + * + * Contains initialized data; always loaded into writable memory. + * + * text relocations + * + * Contains records used by the link editor to update pointers in the + * text segment when combining binary files. + * + * data relocations + * + * Like the text relocation section, but for data segment pointers. + * + * symbol table + * + * Contains records used by the link editor to cross reference the + * addresses of named variables and functions (`symbols') between + * binary files. + * + * string table + * + * Contains the character strings corresponding to the symbol names. + * + * Every binary file begins with an exec structure: + */ + +struct exec { + uint32_t a_midmag; /* flags<<26 | mid<<16 | magic */ + uint32_t a_text; /* text segment size */ + uint32_t a_data; /* initialized data size */ + uint32_t a_bss; /* uninitialized data size */ + uint32_t a_syms; /* symbol table size */ + uint32_t a_entry; /* entry point */ + uint32_t a_trsize; /* text relocation size */ + uint32_t a_drsize; /* data relocation size */ +}; + +/* + * The exec fields have the following functions: + * + * a_midmag + * + * This field is stored in host byte-order. It has a number of + * sub-components accessed by the macros N_GETFLAG(), N_GETMID(), and + * N_GETMAGIC(), and set by the macro N_SETMAGIC(). + * + * The macro N_GETFLAG() returns a few flags: + * + * EX_DYNAMIC + * + * indicates that the executable requires the services of the + * run-time link editor. + * + * EX_PIC + * + * indicates that the object contains position independent code. + * + * If both EX_DYNAMIC and EX_PIC are set, the object file is a position + * independent executable image (e.g. a shared library), which is to be + * loaded into the process address space by the run-time link editor. + * + * The macro N_GETMID() returns the machine-id. This indicates which + * machine(s) the binary is intended to run on. + * + * N_GETMAGIC() specifies the magic number, which uniquely identifies + * binary files and distinguishes different loading conventions. The field + * must contain one of the following values: + * + * NED_MAGIC1 + * + * The text and data segments immediately follow the header and + * are contiguous. Both text and data segments are loaded into + * writable memory. + * + * a_text + * + * Contains the size of the text segment in bytes. + * + * a_data + * + * Contains the size of the data segment in bytes. + * + * a_bss + * + * Contains the size of the bss segment in bytes. + * + * a_syms + * + * Contains the size of the symbol table segment in bytes. + * + * a_entry + * + * Contains the address in memory of the entry point of the program. + * + * a_trsize + * + * Contains the size in bytes of the text relocation table. + * + * a_drsize + * + * Contains the size in bytes of the data relocation table. + * + * The include file defines several macros which use an exec + * structure to test consistency or to locate section offsets in the binary + * file. + * + * N_BADMAG(exec) + * + * Nonzero if the a_magic field does not contain a recognized value. + * + * N_TXTOFF(exec) + * + * The byte offset in the binary file of the beginning of the text + * segment. + * + * N_SYMOFF(exec) + * + * The byte offset of the beginning of the symbol table. + * + * N_STROFF(exec) + * + * The byte offset of the beginning of the string table. + */ + +#define N_GETMAGIC(ex) ((ex).a_midmag & 0xffff) +#define N_GETMID(ex) (((ex).a_midmag >> 16) & 0x03ff) +#define N_GETFLAG(ex) (((ex).a_midmag >> 26) & 0x3f) +#define N_SETMAGIC(ex,mag,mid,flag) \ + ((ex).a_midmag = (((flag) & 0x3f) <<26) | (((mid) & 0x03ff) << 16) | ((mag) & 0xffff)) + +#define N_BADMAG(ex) (N_GETMAGIC(ex) != NED_MAGIC1) + +#define N_TXTOFF(ex) (sizeof(struct exec)) +#define N_DATOFF(ex) (N_TXTOFF(ex) + (ex).a_text) +#define N_RELOFF(ex) (N_DATOFF(ex) + (ex).a_data) +#define N_SYMOFF(ex) (N_RELOFF(ex) + (ex).a_trsize + (ex).a_drsize) +#define N_STROFF(ex) (N_SYMOFF(ex) + (ex).a_syms) + +/* There doesn't appear to be any pattern to magic number assignments. */ +/* See: /usr/src/contrib/file/magic/Magdir/aout */ +#define NED_MAGIC1 0x107 + +/* There doesn't seem to be any pattern to Machine ID number assignments. */ +/* For now, I'm using the sum of the ASCII values for "NED". */ +#define MID_NED 0xD7 /* NED binary */ + +#define EX_PIC 0x10 /* contains position independent code */ +#define EX_DYNAMIC 0x20 /* contains run-time link-edit info */ +#define EX_DPMASK 0x30 /* mask for the above */ + +/* + * Relocation records have a standard format which is described by the + * relocation_info structure: + */ + +struct relocation_info { + uint32_t r_address; /* offset in text or data segment */ + uint32_t r_symbolnum : 24, /* ordinal number of add symbol */ + r_pcrel : 1, /* 1 if value should be pc-relative */ + r_length : 2, /* log base 2 of value's width */ + r_extern : 1, /* 1 if need to add symbol to value */ + r_baserel : 1, /* linkage table relative */ + r_jmptable : 1, /* relocate to jump table */ + r_relative : 1, /* load address relative */ + r_copy : 1; /* run time copy */ +}; + +/* + * The relocation_info fields are used as follows: + * + * r_address + * + * Contains the byte offset of a pointer that needs to be link-edited. + * Text relocation offsets are reckoned from the start of the text + * segment, and data relocation offsets from the start of the data + * segment. The link editor adds the value that is already stored at this + * offset into the new value that it computes using this relocation + * record. + * + * r_symbolnum + * + * Contains the ordinal number of a symbol structure in the symbol table + * (it is not a byte offset). After the link editor resolves the absolute + * address for this symbol, it adds that address to the pointer that is + * under going relocation. (If the r_extern bit is clear, the situation is + * different; see below.) + * + * r_pcrel + * + * If this is set, the link editor assumes that it is updating a pointer + * that is part of a machine code instruction using pc-relative + * addressing. The address of the relocated pointer is implicitly added to + * its value when the running program uses it. + * + * r_length + * + * Contains the log base2 of the length of the pointer in bytes; 0 for + * 1-byte displacements, 1 for 2-byte displacements, 2 for 4-byte + * displacements. + * + * r_extern + * + * Set if this relocation requires an external reference; the link editor + * must use a symbol address to update the pointer. When the r_extern bit + * is clear, the relocation is `local'; the link editor updates the + * pointer to reflect changes in the load addresses of the various + * segments, rather than changes in the value of a symbol (except when + * r_baserel is also set (see below). In this case, the content of the + * r_symbolnum field is an n_type value (see below); this type field tells + * the link editor what segment the relocated pointer points into. + * + * r_baserel + * + * If set, the symbol, as identified by the r_symbolnum field, is to be + * relocated to an offset into the Global Offset Table. At runtime, the + * entry in the Global Offset Table at this offset is set to be the + * address of the symbol. + * + * r_jmptable + * + * If set, the symbol, as identified by the r_symbolnum field, is to be + * relocated to an offset into the Procedure Linkage Table. + * + * r_relative + * + * If set, this relocation is relative to the (run-time) load address of + * the image this object file is going to be a part of. This type of + * relocation only occurs in shared objects. + * + * r_copy + * + * If set, this relocation record identifies a symbol whose contents + * should be copied to the location given in r_address. The copying is + * done by the runtime link-editor from a suitable data item in a shared + * object. + * + * Symbols map names to addresses (or more generally, strings to values). + * Since the link-editor adjusts addresses, a symbol's name must be used to + * stand for its address until an absolute value has been assigned. Symbols + * consist of a fixed-length record in the symbol table and a variable-length + * name in the string table. The symbol table is an array of nlist structures: + */ + +struct nlist { + union { + char * n_name; + uint32_t n_strx; + } n_un; + uint32_t n_type; + uint32_t n_other; + uint32_t n_desc; + uint32_t n_value; +}; + +/* + * The fields are used as follows: + * + * n_un.n_strx + * + * Contains a byte offset into the string table for the name of this + * symbol. + * + * n_un.n_name + * + * Used by the runtime link editor. Contains a pointer to the string in + * memory. + * + * n_type + * + * Used by the link editor to determine how to update the symbol's value. + * The n_type field is broken down into three sub-fields using bitmasks. + * The link editor treats symbols with the N_EXT type bit set as + * `external' symbols and permits references to them from other binary + * files. The N_TYPE mask selects bits of interest to the link editor: + * + * N_UNDF + * + * An undefined symbol. The link editor must locate an external + * symbol with the same name in another binary file to determine + * the absolute value of this symbol. As a special case, if the + * n_value field is nonzero and no binary file in the link-edit + * defines this symbol, the link-editor will resolve this symbol + * to an address in the bss segment, reserving an amount of bytes + * equal to n_value. If this symbol is undefined in more than one + * binary file and the binary files do not agree on the size, the + * link editor chooses the greatest size found across all + * binaries. + * + * N_ABS + * + * An absolute symbol. The link editor does not update an absolute + * symbol. + * + * N_TEXT + * + * A text symbol. This symbol's value is a text address and the + * link editor will update it when it merges binary files. + * + * N_DATA + * + * A data symbol; similar to N_TEXT but for data addresses. The + * values for text and data symbols are not file offsets but + * addresses; to recover the file offsets, it is necessary to + * identify the loaded address of the beginning of the + * corresponding section and subtract it, then add the offset of + * the section. + * + * N_BSS + * + * A bss symbol; like text or data symbols but has no + * corresponding offset in the binary file. + * + * The N_STAB mask selects bits of interest to symbolic debuggers. + * + * n_other + * + * This field provides information on the nature of the symbol independent + * of the symbol's location in terms of segments as determined by the + * n_type field. Currently, the lower 4 bits of the n_other field hold one + * of two values: AUX_FUNC and AUX_OBJECT. AUX_FUNC associates the symbol + * with a callable function, while AUX_OBJECT associates the symbol with + * data, irrespective of their locations in either the text or the data + * segment. + * + * n_desc + * + * Reserved for use by debuggers; passed untouched by the link editor. + * Different debuggers use this field for different purposes. + * + * n_value + * + * Contains the value of the symbol. For text, data and bss symbols, this + * is an address; for other symbols (such as debugger symbols), the value + * may be arbitrary. + * + * The string table consists of a 32-bit length followed by null-terminated + * symbol strings. The length represents the sizeof the entire table in bytes, + * so its minimum value (or the offset of the first string) is always 4 on + * 32-bit machines. + */ + +/* Used in nlist.n_type. */ +#define N_UNDF 0x00 /* undefined */ +#define N_ABS 0x02 /* absolute address */ +#define N_TEXT 0x04 /* text segment */ +#define N_DATA 0x08 /* data segment */ +#define N_BSS 0x10 /* bss segment */ + +#define N_EXT 0x01 /* external (global) bit, OR'ed in */ +#define N_TYPE 0xff /* mask for all the type bits */ +#define N_STAB 0xffffff00 /* mask for debugger symbols -- stab(5) */ + +/* Used in nlist.n_other */ +#define AUX_FUNC 1 /* Function */ +#define AUX_OBJECT 2 /* Data */ + +#endif diff --git a/docs/compat_matrix.md b/docs/compat_matrix.md index 7e4bd2b..75bfe4c 100644 --- a/docs/compat_matrix.md +++ b/docs/compat_matrix.md @@ -10,3 +10,4 @@ Compatibility Matrix | nedsim | nedasm | neddis | Arch. Man. | Inst. Ref. | | ------ | ------ | ------ | ---------- | ---------- | | 1 | 1 | 1 | 1 | 1 | +| 2 | 2 | 2 | . | . | diff --git a/misc/licenses/FreeBSD b/misc/licenses/FreeBSD new file mode 100644 index 0000000..2d706b9 --- /dev/null +++ b/misc/licenses/FreeBSD @@ -0,0 +1,33 @@ +This license applies to sections of the following files: + +ned/common/a.out.h + +================================================================================ + +Copyright (c) 1992, 1993 + The Regents of the University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. diff --git a/nedasm/nedasm.c b/nedasm/nedasm.c index 4b4d3bc..c82233d 100644 --- a/nedasm/nedasm.c +++ b/nedasm/nedasm.c @@ -14,7 +14,7 @@ #include "nedasm_parser.h" #include "nedasm_codegen.h" -#define VERSION 1 +#define VERSION 2 void print_usage(char ** argv) @@ -195,10 +195,6 @@ main(int argc, char ** argv) */ parse_assembly(&instructions, input); fclose(input); -// if (instructions == NULL) { -// fprintf(stderr, "ERROR: Failed to parse any input.\n"); -// exit(EXIT_FAILURE); -// } /* * Make passes over the intermediate representation to enforce architecture @@ -208,7 +204,6 @@ main(int argc, char ** argv) enforce_word_boundary(instructions); assign_addresses(instructions); resolve_labels(instructions); - prune_label_structs(instructions); generate_code(instructions, output); diff --git a/nedasm/nedasm_codegen.c b/nedasm/nedasm_codegen.c index 9484a1d..ddfd792 100644 --- a/nedasm/nedasm_codegen.c +++ b/nedasm/nedasm_codegen.c @@ -6,30 +6,140 @@ #include #include #include +#include #include "nedasm_structures.h" +#include "nedasm_misc.h" +#include "../common/a.out.h" void -write_word_to_file(uint32_t * word, FILE * file) +write_aout_exec_header(FILE * output, struct exec * aout_exec) { - fwrite(word, 4, 1, file); + uint32_t write_count = 0; + for (uint32_t i=0; i<8; i++) { + switch (i) { + case 0: write_count = fwrite(&(aout_exec->a_midmag), 4, 1, output); break; + case 1: write_count = fwrite(&(aout_exec->a_text), 4, 1, output); break; + case 2: write_count = fwrite(&(aout_exec->a_data), 4, 1, output); break; + case 3: write_count = fwrite(&(aout_exec->a_bss), 4, 1, output); break; + case 4: write_count = fwrite(&(aout_exec->a_syms), 4, 1, output); break; + case 5: write_count = fwrite(&(aout_exec->a_entry), 4, 1, output); break; + case 6: write_count = fwrite(&(aout_exec->a_trsize), 4, 1, output); break; + case 7: write_count = fwrite(&(aout_exec->a_drsize), 4, 1, output); break; + } + if (write_count != 1) { + fprintf(stderr, "ERROR: Failed to write aout header.\n"); + exit(EXIT_FAILURE); + } + } +} + +void +write_aout_text_segment(FILE * output, uint32_t * text_segment) +{ + for (uint32_t i=1; i <= text_segment[0]; i++) { + uint32_t write_count = fwrite(&(text_segment[i]), 4, 1, output); + if (write_count != 1) { + fprintf(stderr, "ERROR: Failed to write text segment.\n"); + exit(EXIT_FAILURE); + } + } +} + +void +write_aout_symbol_table(FILE * output, struct nlist * symbol_table, uint32_t symbol_count) +{ + uint32_t write_count = 0; + for (uint32_t i=0; i < symbol_count; i++) { + for (uint32_t j=0; j<5; j++) { + switch (j) { + case 0: write_count = fwrite(&(symbol_table[i].n_un.n_strx), 4, 1, output); break; + case 1: write_count = fwrite(&(symbol_table[i].n_type), 4, 1, output); break; + case 2: write_count = fwrite(&(symbol_table[i].n_other), 4, 1, output); break; + case 3: write_count = fwrite(&(symbol_table[i].n_desc), 4, 1, output); break; + case 4: write_count = fwrite(&(symbol_table[i].n_value), 4, 1, output); break; + } + if (write_count != 1) { + fprintf(stderr, "ERROR: Failed to write symbol table.\n"); + exit(EXIT_FAILURE); + } + } + } +} + +void +write_aout_string_table(FILE * output, char * string_table, uint32_t string_table_size) +{ + uint32_t write_count = fwrite(&string_table_size, 4, 1, output); + if (write_count != 1) { + fprintf(stderr, "ERROR: Failed to write string table size.\n"); + exit(EXIT_FAILURE); + } + for (uint32_t i=0; i < string_table_size; i++) { + write_count = fwrite(&(string_table[i]), 1, 1, output); + if (write_count != 1) { + fprintf(stderr, "ERROR: Failed to write string table.\n"); + exit(EXIT_FAILURE); + } + } } void -pad_word_boundary(uint8_t * syllable_count, uint32_t * word, FILE * output) +generate_aout(FILE * output, uint32_t * text_segment, + struct nlist * symbol_table, uint32_t symbol_count) +{ + struct exec aout_exec; + N_SETMAGIC(aout_exec, NED_MAGIC1, MID_NED, 0); + aout_exec.a_text = (4 * text_segment[0]); /* 4 bytes per word. */ + aout_exec.a_data = 0; + aout_exec.a_bss = 0; + aout_exec.a_syms = (20 * symbol_count); /* 20 = 5x 32-bit values from nlist struct. */ + aout_exec.a_entry = MEM_BEGIN; + aout_exec.a_trsize = 0; + aout_exec.a_drsize = 0; + write_aout_exec_header(output, &aout_exec); + + write_aout_text_segment(output,text_segment); + + uint32_t string_table_size = 0; + for (uint32_t i = 0; i < symbol_count; i++) { + string_table_size += (strnlen(symbol_table[i].n_un.n_name, MAX_LABEL_LEN) + 1); + } + char * string_table = malloc(string_table_size); + uint32_t string_table_offset = 0; + for (uint32_t i = 0; i < symbol_count; i++) { + uint32_t len = (strnlen(symbol_table[i].n_un.n_name, MAX_LABEL_LEN) + 1); + strncpy(string_table+string_table_offset, symbol_table[i].n_un.n_name, len); + symbol_table[i].n_un.n_strx = string_table_offset; + string_table_offset += len; + } + + write_aout_symbol_table(output, symbol_table, symbol_count); + + write_aout_string_table(output, string_table, string_table_size); +} + +void +write_word_to_text_segment(uint32_t * word, uint32_t * text_seg) +{ + text_seg[(++text_seg[0])] = *word; +} + +void +pad_word_boundary(uint8_t * syllable_count, uint32_t * word, uint32_t * text_seg) { if (*syllable_count > 0) { while (*syllable_count <= 4) { *word |= 0b000001 << 6 * (4 - (*syllable_count)++); } - write_word_to_file(word, output); + write_word_to_text_segment(word, text_seg); *syllable_count = 0; *word = 0; } } void -generate_code_WORD(struct instruction * instructions, FILE * output) +generate_code_WORD(struct instruction * instructions, uint32_t * text_seg) { /* Set the instruction format to Type A. */ uint32_t temp_word = 0b10000000000000000000000000000000; @@ -49,12 +159,11 @@ generate_code_WORD(struct instruction * instructions, FILE * output) /* Set the data portion of the instruction. */ temp_word |= instructions->data >> 1; - /* Write to disk. */ - write_word_to_file(&temp_word, output); + write_word_to_text_segment(&temp_word, text_seg); } void -generate_code_IM(struct instruction * instructions, FILE * output, +generate_code_IM(struct instruction * instructions, uint8_t * syllable_count, uint32_t * temp_word) { uint8_t temp_syllable = 0b00100000; @@ -68,7 +177,7 @@ generate_code_IM(struct instruction * instructions, FILE * output, } void -generate_code_LDSP(struct instruction * instructions, FILE * output, +generate_code_LDSP(struct instruction * instructions, uint8_t * syllable_count, uint32_t * temp_word) { uint8_t temp_syllable = 0b00011000; @@ -82,7 +191,7 @@ generate_code_LDSP(struct instruction * instructions, FILE * output, } void -generate_code_STSP(struct instruction * instructions, FILE * output, +generate_code_STSP(struct instruction * instructions, uint8_t * syllable_count, uint32_t * temp_word) { uint8_t temp_syllable = 0b00010000; @@ -95,12 +204,35 @@ generate_code_STSP(struct instruction * instructions, FILE * output, *temp_word |= temp_syllable << 6 * (4 - *syllable_count); } +void +generate_label(struct instruction * instructions, struct nlist * symbol_table, uint32_t index) +{ + symbol_table[index].n_un.n_name = instructions->label; + symbol_table[index].n_type = N_ABS; + symbol_table[index].n_other = AUX_FUNC; + symbol_table[index].n_desc = 0; + while (instructions->syllable == LABEL) instructions = instructions->next; + symbol_table[index].n_value = instructions->address; +} + void generate_code(struct instruction * instructions, FILE * output) { + uint32_t label_count = 0; + uint32_t max_word_count = 0; + struct instruction * temp = seek_instruction_list_start(instructions); + while (temp != NULL) { + (temp->syllable == LABEL) ? (label_count++) : (max_word_count++); + temp = temp->next; + } + /* +1 to store the number of entries as the first element of the array. */ + uint32_t * text_segment = malloc((max_word_count * sizeof(uint32_t)) + 1); + struct nlist * symbol_table = malloc(label_count * sizeof(struct nlist)); + uint8_t syllable_count = 0; uint32_t temp_word = 0; + label_count = 0; instructions = seek_instruction_list_start(instructions); while (instructions != NULL) { /* If starting a new word, zero the word, setting it to Type C by default. */ @@ -109,17 +241,17 @@ generate_code(struct instruction * instructions, FILE * output) switch (instructions->syllable) { case WORD: /* Must pad partial word w/NOPs & write to disk before starting new WORD. */ - pad_word_boundary(&syllable_count, &temp_word, output); - generate_code_WORD(instructions, output); + pad_word_boundary(&syllable_count, &temp_word, text_segment); + generate_code_WORD(instructions, text_segment); break; case IM: - generate_code_IM(instructions, output, &syllable_count, &temp_word); + generate_code_IM(instructions, &syllable_count, &temp_word); break; case LDSP: - generate_code_LDSP(instructions, output, &syllable_count, &temp_word); + generate_code_LDSP(instructions, &syllable_count, &temp_word); break; case STSP: - generate_code_STSP(instructions, output, &syllable_count, &temp_word); + generate_code_STSP(instructions, &syllable_count, &temp_word); break; case MVSTCK: temp_word |= 0b001111 << 6 * (4 - syllable_count); break; case ADD: temp_word |= 0b001100 << 6 * (4 - syllable_count); break; @@ -137,18 +269,27 @@ generate_code(struct instruction * instructions, FILE * output) case HALT: temp_word |= 0b000000 << 6 * (4 - syllable_count); break; case SWAP: temp_word |= 0b001101 << 6 * (4 - syllable_count); break; case JMP: temp_word |= 0b001110 << 6 * (4 - syllable_count); break; + case LABEL: + generate_label(instructions, symbol_table, label_count); + label_count++; + break; default: fprintf(stderr, "ERROR: Unassigned syllable on line %u.\n", instructions->linenum); break; } - if (syllable_count == 4) write_word_to_file(&temp_word, output); + if (syllable_count == 4) write_word_to_text_segment(&temp_word, text_segment); - if (instructions->syllable != WORD) syllable_count = (syllable_count + 1) % 5; + if (instructions->syllable != WORD && instructions->syllable != LABEL) { + syllable_count = (syllable_count + 1) % 5; + } instructions = instructions->next; } /* If necessary, pad incomplete word with NOPs. */ - pad_word_boundary(&syllable_count, &temp_word, output); + pad_word_boundary(&syllable_count, &temp_word, text_segment); + + /* Write to disk */ + generate_aout(output, text_segment, symbol_table, label_count); } diff --git a/neddis/neddis.c b/neddis/neddis.c index 96256d0..306666e 100644 --- a/neddis/neddis.c +++ b/neddis/neddis.c @@ -8,8 +8,11 @@ #include #include #include +#include -#define VERSION 1 +#include "../common/a.out.h" + +#define VERSION 2 enum syllables { MVSTCK = 0b00001111, @@ -60,6 +63,8 @@ print_usage(char ** argv) "Usage: %s -i \n" " -h Help (prints this message)\n" " -i Specify a binary image file to disassemble.\n" + " -s Print summary of information in a.out exec header.\n" + " -l Print labels and organize disassembled code by label.\n" , VERSION, argv[0] ); } @@ -160,6 +165,110 @@ print_formatC_mnemonics(uint32_t word) } } +void +parse_aout_file(FILE * input, struct exec * aout_exec, uint32_t ** text_segment, + struct nlist ** symbol_table, uint32_t * symbol_count) +{ + uint32_t read_count = 0; + + /* Read in and check the a.out header. */ + for (uint32_t i=0; i<8; i++) { + switch (i) { + case 0: read_count = fread(&(aout_exec->a_midmag), 4, 1, input); break; + case 1: read_count = fread(&(aout_exec->a_text), 4, 1, input); break; + case 2: read_count = fread(&(aout_exec->a_data), 4, 1, input); break; + case 3: read_count = fread(&(aout_exec->a_bss), 4, 1, input); break; + case 4: read_count = fread(&(aout_exec->a_syms), 4, 1, input); break; + case 5: read_count = fread(&(aout_exec->a_entry), 4, 1, input); break; + case 6: read_count = fread(&(aout_exec->a_trsize), 4, 1, input); break; + case 7: read_count = fread(&(aout_exec->a_drsize), 4, 1, input); break; + } + if (read_count != 1) { + fprintf(stderr, "ERROR: Invalid a.out header.\n"); + exit(EXIT_FAILURE); + } + } + if (N_BADMAG(*aout_exec)) { + fprintf(stderr, "ERROR: Invalid magic number in a.out header.\n"); + exit(EXIT_FAILURE); + } else if (N_GETMID(*aout_exec) != MID_NED) { + fprintf(stderr, "ERROR: Executable not intended for NED Machine ID.\n"); + exit(EXIT_FAILURE); + } + + /* Read in the text segment. */ + uint32_t text_segment_size = (N_DATOFF(*aout_exec) - N_TXTOFF(*aout_exec)); + *text_segment = malloc(text_segment_size + 4); + (*text_segment)[0] = text_segment_size / 4; + read_count = fread(&((*text_segment)[1]), 1, text_segment_size, input); + if (read_count != text_segment_size) { + fprintf(stderr, "ERROR: Failed to read entire text segment.\n"); + exit(EXIT_FAILURE); + } + + /* Read in the symbol table. */ + *symbol_count = ((N_STROFF(*aout_exec) - N_SYMOFF(*aout_exec)) / 20); /* 20 bytes per symbol. */ + *symbol_table = malloc((*symbol_count) * sizeof(struct nlist)); + for (uint32_t i=0; i < *symbol_count; i++) { + for (uint32_t j=0; j<5; j++) { + switch (j) { + case 0: read_count = fread(&((*symbol_table)[i].n_un.n_strx), 4, 1, input); break; + case 1: read_count = fread(&((*symbol_table)[i].n_type), 4, 1, input); break; + case 2: read_count = fread(&((*symbol_table)[i].n_other), 4, 1, input); break; + case 3: read_count = fread(&((*symbol_table)[i].n_desc), 4, 1, input); break; + case 4: read_count = fread(&((*symbol_table)[i].n_value), 4, 1, input); break; + } + if (read_count != 1) { + fprintf(stderr, "ERROR: Unable to read entire symbol table.\n"); + exit(EXIT_FAILURE); + } + } + } + + /* Read in the string table and update the symbol table entries with pointers to new strings. */ + uint32_t string_table_size; + read_count = fread(&string_table_size, 4, 1, input); + if (read_count != 1) { + fprintf(stderr, "ERROR: Failed to read string table size.\n"); + exit(EXIT_FAILURE); + } + for (uint32_t i=0; i < *symbol_count; i++) { + uint32_t len = 0; + if (i < ((*symbol_count)-1)) { + len = ((*symbol_table)[i+1].n_un.n_strx - (*symbol_table)[i].n_un.n_strx); + } else { + len = (string_table_size - (*symbol_table)[i].n_un.n_strx); + } + (*symbol_table)[i].n_un.n_name = malloc(len); + read_count = fread((*symbol_table)[i].n_un.n_name, 1, len, input); + if (read_count != len) { + fprintf(stderr, "ERROR: Failed to read a string from the string table.\n"); + exit(EXIT_FAILURE); + } + } +} + +void +print_aout_summary(struct exec * aout_exec) +{ + printf("\n Summary\n" + " =====================================\n" + " Magic: 0x%08x\n" + " Machine ID: 0x%08x\n" + " Flags: 0x%08x\n" + " Text Size: 0x%08x bytes\n" + " Data Size: 0x%08x bytes\n" + " BSS Size: 0x%08x bytes\n" + " Symbol Table Size: 0x%08x bytes\n" + " Entry Point: 0x%08x\n" + " Text Reloc. Size: 0x%08x bytes\n" + " Data Reloc. Size: 0x%08x bytes\n\n\n", + N_GETMAGIC(*aout_exec), N_GETMID(*aout_exec), N_GETFLAG(*aout_exec), + aout_exec->a_text, aout_exec->a_data, aout_exec->a_bss, aout_exec->a_syms, + aout_exec->a_entry, aout_exec->a_trsize, aout_exec->a_drsize + ); +} + int main(int argc, char ** argv) { @@ -168,8 +277,16 @@ main(int argc, char ** argv) */ int c; FILE * input = NULL; - while ((c = getopt(argc,argv,"i:h")) != -1) { + bool display_summary = false; + bool display_labels = false; + while ((c = getopt(argc,argv,"i:hsl")) != -1) { switch (c) { + case 'l': + display_labels = true; + break; + case 's': + display_summary = true; + break; case 'i': if ((input = fopen(optarg, "r")) == NULL) { fprintf(stderr, "ERROR: %s: %s\n", optarg, strerror(errno)); @@ -189,14 +306,33 @@ main(int argc, char ** argv) exit(EXIT_FAILURE); } + struct exec aout_exec; + uint32_t * text_segment; + struct nlist * symbol_table; + uint32_t symbol_count; + parse_aout_file(input, &aout_exec, &text_segment, &symbol_table, &symbol_count); + + if (display_summary) print_aout_summary(&aout_exec); + /* * Main Loop */ + print_header(); uint32_t word; - uint32_t offset = 0; + uint32_t offset = aout_exec.a_entry; /* Since all NED instructions are one word (4 bytes) wide, read in one word increments. */ - while (fread(&word, 4, 1, input)) { + uint32_t i = 1; + while (i < text_segment[0]) { + if (display_labels) { + for (uint32_t i=0; i < symbol_count; i++) { + if (offset == symbol_table[i].n_value) { + printf("\n%s:\n", symbol_table[i].n_un.n_name); + } + } + } + word = text_segment[i]; + i++; printf("0x%08x", offset); printf(" "); printf("0x%08x", word); diff --git a/nedsim/nedsim.c b/nedsim/nedsim.c index 9bdaa59..9d5a045 100644 --- a/nedsim/nedsim.c +++ b/nedsim/nedsim.c @@ -16,7 +16,9 @@ #include #include -#define VERSION 1 +#include "../common/a.out.h" + +#define VERSION 2 /* Bytes per word. */ #define BPW 4 @@ -541,6 +543,53 @@ wait_for_next_clock_cycle(struct NEDstate * state) } } +void +parse_aout_file(FILE * input, struct exec * aout_exec, uint8_t * text_segment) +{ + uint32_t read_count = 0; + + /* Read in and check the a.out header. */ + for (uint32_t i=0; i<8; i++) { + switch (i) { + case 0: read_count = fread(&(aout_exec->a_midmag), 4, 1, input); break; + case 1: read_count = fread(&(aout_exec->a_text), 4, 1, input); break; + case 2: read_count = fread(&(aout_exec->a_data), 4, 1, input); break; + case 3: read_count = fread(&(aout_exec->a_bss), 4, 1, input); break; + case 4: read_count = fread(&(aout_exec->a_syms), 4, 1, input); break; + case 5: read_count = fread(&(aout_exec->a_entry), 4, 1, input); break; + case 6: read_count = fread(&(aout_exec->a_trsize), 4, 1, input); break; + case 7: read_count = fread(&(aout_exec->a_drsize), 4, 1, input); break; + } + if (read_count != 1) { + fprintf(stderr, "ERROR: Invalid a.out header.\n"); + exit(EXIT_FAILURE); + } + } + if (N_BADMAG(*aout_exec)) { + fprintf(stderr, "ERROR: Invalid magic number in a.out header.\n"); + exit(EXIT_FAILURE); + } else if (N_GETMID(*aout_exec) != MID_NED) { + fprintf(stderr, "ERROR: Executable not intended for NED Machine ID.\n"); + exit(EXIT_FAILURE); + } + + /* Read in the text segment. */ + uint32_t text_segment_size = (N_DATOFF(*aout_exec) - N_TXTOFF(*aout_exec)); + read_count = fread(text_segment, 1, text_segment_size, input); + if (read_count != text_segment_size) { + fprintf(stderr, "ERROR: Failed to read entire text segment.\n"); + exit(EXIT_FAILURE); + } + + /* Correct the byte order. */ + for (uint32_t i=0; i < (text_segment_size / 4); i++) { + uint8_t temp_word[4]; + for (uint8_t j=0; j<4; j++) temp_word[j] = text_segment[((i*4)+j)]; + for (uint8_t j=0; j<4; j++) text_segment[((i*4)+j)] = temp_word[(3-j)]; + } +} + + int main(int argc, char ** argv) { @@ -555,6 +604,7 @@ main(int argc, char ** argv) case 'i': if ((input = fopen(optarg, "r")) == NULL) { fprintf(stderr, "ERROR: %s: %s\n", optarg, strerror(errno)); + exit(EXIT_FAILURE); } break; case 'p': @@ -563,7 +613,7 @@ main(int argc, char ** argv) if (1 <= temp_p && temp_p <= 1000000000) { clock_period = temp_p; } else { - fprintf(stderr, "ERROR: Clock period out of range.\n"); + fprintf(stderr, "WARN: Clock period out of range.\n"); } break; } @@ -603,12 +653,9 @@ main(int argc, char ** argv) signal(SIGINT, ned_sigint_handler); /* Load an initial image into memory. */ - uint32_t temp_word; uint32_t address = 0x20000000; - while(fread(&temp_word, 4, 1, input)) { - ram_w_word(state, address, temp_word); - address += 4; - } + struct exec aout_exec; + parse_aout_file(input, &aout_exec, &(state->ram[address])); fclose(input); /* -- 2.20.1