#include <unistd.h>
#include <stdio.h>
#include <string.h>
+#include <strings.h>
#include <errno.h>
#include <sysexits.h>
#include <assert.h>
-/* #include dcpu16.h */
-typedef unsigned short DCPU16_WORD;
+#include "dcpu16.h"
+#include "common.h"
-/* quick and dirty assembler for dcpu16 */
+/*
+ * quick and dirty assembler for dcpu16
+ *
+ * Justin Wind <justin.wind@gmail.com>
+ * 2012 04 07 - implementation started
+ * 2012 04 10 - functional
+ * 2012 04 16 - support dat statements
+ * 2012 05 05 - v1.7 revision started
+ * 2012 05 08 - v1.7 revision implemented
+ *
+ * TODO
+ * needs ability to specify location for code or data
+ * needs ability to specify label as relative to another label
+ * short labels not correctly computed
+ * in label struct, store index of instruction rather than ptr, ptrs for iteration in addr calculation are ugly
+ */
static const char * const src_id_ = "$Id$";
const char const out_filename_default_[] = "a.out";
+/* global invocation options */
+struct options {
+ unsigned int verbose;
+ unsigned int dryrun;
+} opt_ = {
+ .verbose = 0,
+ .dryrun = 0,
+};
+
+#define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
+#define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
+#define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
+
static
void usage_(char *prog, unsigned int full) {
FILE *f = full ? stdout : stderr;
fprintf(f, "%s -- \n\n",
prog);
- fprintf(f, "Usage: %s\n",
+ fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
prog);
if (full) {
fprintf(f, "\nOptions:\n"
"\t-h -- this screen\n"
- "\t-o <file> -- output to <file> [default: %s]\n",
+ "\t-o <file> -- output to <file> [default: %s]\n"
+ "\t-s -- allow short labels in instruction words\n"
+ "\t-d -- dry run, print results, do not write to file\n"
+ "\t-v -- verbose output\n",
out_filename_default_);
fprintf(f, "\n%78s\n",
}
}
+/* LSB-0 aaaaaabbbbbooooo */
+#define OPCODE_BITS 5
+#define OPERAND_B_BITS 5
+#define OPERAND_A_BITS 6
+#define N_BIT_MASK(__x__) ((1 << (__x__)) - 1)
+
+
+/* instructions have operands */
struct operand_ {
struct operand_ *next;
- char *operand;
+ char *operand; /* tokenized operand text */
};
+/* keep an array of instructions as we read them in */
struct instruction_ {
- struct instruction_ *next;
- char *label;
- char *opcode;
- struct operand_ *operands;
-
- unsigned int length; /* words */
+ size_t src_line;
+ char *label; /* set if a label points here */
+ char *opcode; /* tokenized instruction text */
+ struct operand_ *operands; /* list of operands */
+ unsigned int ready : 1; /* bytecode computed? */
+ unsigned int length; /* number of words of bytecode */
DCPU16_WORD instr_words[];
};
-/* buf must be 0-terminated */
+/* keep an array of labels, indexed back to their instruction locations */
+struct label_ {
+ char *label; /* name of label */
+ struct instruction_ **instr; /* pointer into array of instructions */
+ unsigned int ready : 1; /* do we know where this label is yet? */
+ DCPU16_WORD addr;
+};
+
+
+/* locate and return the label entry matching name */
static
-int buf_tokenize_(char *buf, struct instruction_ **next_instr) {
- const char const *sep = " \t\n";
- struct instruction_ *instr = NULL;
- char *label = NULL,
- *opcode = NULL,
- *operand = NULL;
+struct label_ *label_find_(struct dynamic_array *labels, char *name) {
+ size_t x;
+
+ for (x = 0; x < labels->entries; x++) {
+ struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
+ if (strcmp(l->label, name) == 0)
+ return l;
+ }
+ return NULL;
+}
+
+
+/* if a label has a validly-calculated address, fetch it */
+static
+int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
+ struct label_ *l;
+
+ if ( (l = label_find_(labels, name)) == NULL )
+ return -1;
+ if (! l->ready)
+ return -2;
+ *addr = l->addr;
+ return 0;
+}
+
+
+/* attempt to determine the addresses of all labels */
+static
+void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
+ size_t i;
+
+ /* idea: label1:label2 - calculated as offset between labels */
+
+ /* for each label.. */
+ for (i = 0; i < labels->entries; i++) {
+ struct label_ *l;
+ struct instruction_ **instr;
+ unsigned int word_count = 0;
+
+ l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
+
+ DEBUG_PRINTFQ("%s: calculating address of label '%s'\n", __func__, l->label);
+
+#if 0
+force full resolution while debugging
+ /* if it's already calculated, great. */
+ if (l->ready)
+ continue;
+#endif
+
+ /*
+ * starting at the instruction for this label,
+ * walk backwards through the list of instructions
+ * until we get to the start or a known prior label address.
+ * update our label with the freshly calculated addr
+ */
+
+ /* first fetch the instruction associated with the label we want to know about.. */
+ /* the addr of this instruction will be whatever follows all the preceding instructions */
+ /* so back up one before counting instruction lengths... */
+ instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
+ /* is it the first one? */
+ if (instr == (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0))
+ break;
+
+ instr--;
+
+ while (instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) {
+ if ((*instr)->ready == 0)
+ DEBUG_PRINTF("%s: instr '%s' not ready\n", __func__, (*instr)->opcode);
+ word_count += (*instr)->length;
+
+ DEBUG_PRINTF("%s: instr '%s' takes '%u' bytes\n", __func__, (*instr)->opcode, (*instr)->length);
+
+ /* have we come across an instruction which a label points to?
+ it should already be calculated, so just add that on and be done */
+ if ((*instr)->label
+ && strcmp((*instr)->label, l->label)) {
+ DCPU16_WORD addr;
+
+ if (label_addr_(labels, (*instr)->label, &addr)) {
+ fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
+ (*instr)->label,
+ l->label);
+ continue;
+ }
+
+ word_count += addr;
+ break;
+ }
+ instr--;
+ }
+ l->addr = word_count;
+ l->ready = 1;
+ DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
+ }
+}
+
+
+/* generate the nibble for a given basic opcode */
+static
+int opcode_bits_(char *opcode) {
+ static struct {
+ char op[4];
+ char value;
+ } opcodes_lower_nibble[] = {
+ { "JSR", 0x00 },
+ { "INT", 0x00 },
+ { "IAG", 0x00 },
+ { "IAS", 0x00 },
+ { "RFI", 0x00 },
+ { "IAQ", 0x00 },
+ { "HWN", 0x00 },
+ { "HWQ", 0x00 },
+ { "HWI", 0x00 },
+ { "SET", 0x01 },
+ { "ADD", 0x02 },
+ { "SUB", 0x03 },
+ { "MUL", 0x04 },
+ { "MLI", 0x05 },
+ { "DIV", 0x06 },
+ { "DVI", 0x07 },
+ { "MOD", 0x08 },
+ { "MDI", 0x09 },
+ { "AND", 0x0a },
+ { "BOR", 0x0b },
+ { "XOR", 0x0c },
+ { "SHR", 0x0d },
+ { "ASR", 0x0e },
+ { "SHL", 0x0f },
+ { "IFB", 0x10 },
+ { "IFC", 0x11 },
+ { "IFE", 0x12 },
+ { "IFN", 0x13 },
+ { "IFG", 0x14 },
+ { "IFA", 0x15 },
+ { "IFL", 0x16 },
+ { "IFU", 0x17 },
+ { "ADX", 0x1a },
+ { "SBX", 0x1b },
+ { "STI", 0x1e },
+ { "SDI", 0x1f },
+ { "", 0x00 }
+ }, *o;
+
+ for (o = opcodes_lower_nibble; o->op[0]; o++) {
+ if (strcasecmp(o->op, opcode) == 0)
+ break;
+ }
+
+ if (o->op[0] == '\0') {
+ fprintf(stderr, "unknown instruction '%s'\n", opcode);
+ return -1;
+ }
+
+ return o->value;
+}
+
+/* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
+static
+int nbi_opcode_bits_(char *nbi_opcode) {
+ static struct {
+ char op[4];
+ char value;
+ } nbi_opcodes_bits[] = {
+ { " ", 0x00 }, /* reserved for future */
+ { "JSR", 0x01 },
+ { "INT", 0x08 },
+ { "IAG", 0x09 },
+ { "IAS", 0x0a },
+ { "RFI", 0x0b },
+ { "IAQ", 0x0c },
+ { "HWN", 0x10 },
+ { "HWQ", 0x11 },
+ { "HWI", 0x12 },
+ { "", 0x00 }
+ }, *o;
+
+ for (o = nbi_opcodes_bits; o->op[0]; o++) {
+ if (strcasecmp(o->op, nbi_opcode) == 0)
+ break;
+ }
+
+ if (o->op[0] == '\0') {
+ fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
+ return -1;
+ }
+
+ return o->value;
+}
+
+/* convert register character like 'x' to value like 0x03 */
+static inline
+unsigned int register_enumerate_(char r) {
+ const char regs[] = "AaBbCcXxYyZzIiJj";
+ const char *x = strchr(regs, r);
+
+ if (x)
+ return (x - regs)/2;
+
+ fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
+ return -1;
+}
+
+/* removes all occurences of chars from buf */
+static inline
+void buf_strip_chars_(char *buf, char *chars) {
+ char *s, *d;
+
+ for (s = d = buf; *s; s++, d++) {
+ while (*s && strchr(chars, *s)) {
+ s++;
+ }
+ if (!*s)
+ break;
+ *d = *s;
+ }
+ *d = *s;
+}
+
+
+/* value_bits_
+ * generate the six bits for a given operand string
+ * returns -1 if it could not parse the operand
+ * returns -2 if it could not parse the operand due to an unresolved label
+ * notes: nextword may be overwritten even if it's not used in final instruction
+ *
+ */
+static
+int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
+ static char *operand = NULL;
+ static size_t operand_sz = 0;
+
+ unsigned long l;
+ char *o, *ep;
+
+ /*
+ Our operand working buffer shouldn't ever need to be too big,
+ but DAT might blow that assumption.
+ */
+ if (operand_sz <= strlen(operand_orig)) {
+ void *tmp_ptr;
+ size_t new_sz = strlen(operand_orig);
- char *x,
- *y,
- *st;
+ if (new_sz < 256)
+ new_sz = 256;
+ new_sz += 256;
- assert(buf != NULL);
- assert(next_instr != NULL);
+ DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
+ tmp_ptr = realloc(operand, new_sz);
+ if (tmp_ptr == NULL) {
+ fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
+ return -1;
+ }
+ operand = tmp_ptr;
+ operand_sz = new_sz;
+ }
+
+ o = strcpy(operand, operand_orig);
+
+ DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
+
+ /* this is a very stupid parser */
+
+ /* first, let's trim all whitespace out of string at once to make parsing easier */
+ buf_strip_chars_(operand, " \t\n");
+
+ /* single character might match a register */
+ if (strlen(operand) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *operand)) {
+ DEBUG_PRINTFQ("is register %c\n", *operand);
+ return register_enumerate_(*operand);
+ }
+
+ /* easy matches */
+
+ /* push and pop now share the same operand value */
+ if (strcasecmp(operand, "POP") == 0
+ || strcasecmp(operand, "[SP++]") == 0) {
+ DEBUG_PRINTFQ("is POP\n");
+ return 0x18;
+ }
+ if (strcasecmp(operand, "PUSH") == 0
+ || strcasecmp(operand, "[--SP]") == 0) {
+ DEBUG_PRINTFQ("is PUSH\n");
+ return 0x18;
+ }
+
+ if (strcasecmp(operand, "PEEK") == 0
+ || strcasecmp(operand, "[SP]") == 0) {
+ DEBUG_PRINTFQ("is PEEK\n");
+ return 0x19;
+ }
+
+ /* this could be better, if we had a real token tree */
+ if (strncasecmp(operand, "PICK", 4) == 0) {
+ DEBUG_PRINTFQ("is PICK ");
+
+ errno = 0;
+ l = strtoul(operand + 4, &ep, 0);
+ if (errno == 0
+ && (*(operand + 4) && (*ep == '\0')) ) {
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("(out of range)\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+ } else if (errno == ERANGE) {
+ DEBUG_PRINTFQ("(out of range)\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ DEBUG_PRINTFQ("0x%04x\n", *nextword);
+ return 0x1a;
+ }
+
+ if (strcasecmp(operand, "SP") == 0) {
+ DEBUG_PRINTFQ("is register SP\n");
+ return 0x1b;
+ }
+ if (strcasecmp(operand, "PC") == 0) {
+ DEBUG_PRINTFQ("is register PC\n");
+ return 0x1c;
+ }
+ if (strcasecmp(operand, "EX") == 0) {
+ DEBUG_PRINTFQ("is register EX\n");
+ return 0x1d;
+ }
+
+ /* is the operand [bracketed]? */
+ if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
+ /* eat the brackets */
+ operand[strlen(operand) - 1] = '\0';
+ operand++;
+
+ /* is it [register]? */
+ if (strlen(operand) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *operand)) {
+ DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
+ return 0x08 | register_enumerate_(*operand);
+ }
+
+ /* is it [register+something]? */
+ if ( (ep = strchr(operand, '+')) ) {
+ char *reg;
+ char *constant;
+
+ DEBUG_PRINTFQ("is multipart.. ");
+
+ /* eat the plus */
+ *ep = '\0';
+ ep++;
+
+ /* figure out which one is which */
+ if ((strlen(ep) == 1 && strchr("AaBbCcXxYyZzIiJj", *ep))
+ || (strlen(ep) == 2 && strcasecmp(ep, "SP")) ) {
+ reg = ep;
+ constant = operand;
+ } else if ((strlen(operand) == 1 && strchr("AaBbCcXxYyZzIiJj", *operand))
+ || (strlen(operand) == 2 && strcasecmp(operand, "SP")) ) {
+ reg = operand;
+ constant = ep;
+ } else {
+ DEBUG_PRINTFQ("is unparsable\n");
+ fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ /* check if something is understandable as a value */
+ errno = 0;
+ l = strtoul(constant, &ep, 0);
+ if (errno == 0
+ && (*constant && (*ep == '\0')) ) {
+ /* string conversion went without issue */
+ /* validate it will fit in a word */
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ /* seems fine */
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+
+ /* special case [SP+n]/PICK n */
+ if (strlen(reg) == 2) {
+ DEBUG_PRINTFQ("is PICK 0x%04x\n", *nextword);
+ return 0x1a;
+ }
+
+ DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
+ return 0x10 | register_enumerate_(*reg);
+ } else if (errno == ERANGE) {
+ fprintf(stderr, "%s('%s'):%s\n", "strtoul", constant, strerror(errno));
+ }
+
+ /* what? still here? assume it's a label, I guess */
+ /* try to populate nextword with label address */
+ if (label_addr_(labels, operand, nextword)) {
+ DEBUG_PRINTFQ("(deferred label resolution)\n");
+ *nextwordused += 1;
+ return -2;
+ }
+ DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
+ *nextwordused += 1;
+ return 0x10 | register_enumerate_(*reg);
+ }
+
+ /* it must just be a dereferenced literal then */
+
+ errno = 0;
+ l = strtoul(operand, &ep, 0);
+ if (errno == 0
+ && (*operand && (*ep == '\0')) ) {
+ /* string conversion went without issue */
+ /* validate it will fit in a word */
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ return 0x1e;
+ } else if (errno) {
+ /* if number wasn't parsable, just fall through and assume it's a label */
+ }
+
+ /* not a number? try a label */
+ if (label_addr_(labels, operand, nextword)) {
+ DEBUG_PRINTFQ("(deferred label resolution)\n");
+ *nextwordused += 1;
+ return -2;
+ }
+ DEBUG_PRINTFQ("is a dereferenced label\n");
+ *nextwordused += 1;
+ return 0x1e;
+ }
+
+ /* left with a literal or a label, then */
+
+ errno = 0;
+ l = strtoul(operand, &ep, 0);
+ if (errno == 0
+ || (*operand && (*ep == '\0')) ) {
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ DEBUG_PRINTFQ("is literal value (%lu)\n", l);
+ if (l < 0x1f) {
+ return l + 0x21;
+ }
+ if (l == 0xffff) {
+ return 0x20;
+ }
+
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ return 0x1f;
+ }
+
+ /* try to populate nextword with label address */
+ if (label_addr_(labels, operand, nextword)) {
+ DEBUG_PRINTFQ("(deferred label resolution)\n");
+ /* assume non-small literal value */
+ *nextwordused += 1;
+ return -2;
+ }
+
+ DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
+ if (allow_short_labels
+ && (*nextword < 0x1f) ) {
+ DEBUG_PRINTF("small value label win\n");
+ return (0x21 + *nextword) & N_BIT_MASK(OPERAND_A_BITS);
+ }
+ if (allow_short_labels
+ && (*nextword == 0xffff) ) {
+ DEBUG_PRINTF("small value label win\n");
+ return 0x20;
+ }
+
+ *nextwordused += 1;
+ return 0x1f;
+}
+
+/* prints an instruction's assembly */
+static inline
+int instruction_print_(struct instruction_ *i, unsigned int with_label) {
+ struct operand_ *o;
+ int r;
+
+ if (with_label)
+ r = printf("%-16s ", i->label ? i->label : "");
+
+ r = printf("%3s", i->opcode ? i->opcode : "");
+
+ for (o = i->operands; o; o = o->next)
+ r += printf(" %s%s", o->operand, o->next ? "," : "");
+
+ if (i->ready) {
+ DCPU16_WORD l;
+ printf(" [");
+ l = dcpu16_mnemonify_buf(i->instr_words);
+ printf("]");
+
+ if (i->length != l)
+ DEBUG_PRINTF("!!internal inconsistency!! i->length:%u l:%hu should match\n", i->length, l);
+ }
+ return r;
+}
+
+/* tokenize_line_
+ * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
+ * [label] instruction [operand[,operand[,...]]]
+ * Does no validation of contents of any of these tokens, as of yet.
+ * does not clean up after itself if a malloc fails
+ */
+static
+int tokenize_line_(char *line, struct instruction_ **next_instr) {
+ const char const *whitespace = " \t\n";
+ const char const *quotes = "\"'`";
+ struct instruction_ *instr = NULL;
+ char *x, *st, *qt;
+ char *label, *opcode;
+ struct operand_ *operand_list = NULL;
+ struct operand_ **operand_tail = &operand_list;
+ size_t instr_words_needed = 0;
+
+ assert(line);
+ assert(next_instr);
*next_instr = NULL;
- /* kill comments */
- if ((x = strchr(buf, ';')) != NULL)
- *x = '\0';
- /* kill leading whitespace */
- buf += strspn(buf, " \t\n");
- /* kill trailing newlines */
- if ((x = strrchr(buf, '\n')) != NULL)
- *x = '\0';
+ /* strip leading whitespace */
+ line += strspn(line, whitespace);
+ if (*line == '\0')
+ return 0;
- /* determine if first token is label, opcode, or we just have a blank line to ignore */
- x = strtok_r(buf, sep, &st);
+ /* set first bare ';' to '\0', thus isolating any comments */
+ /* here we only care about the side-effect of truncating the first separator character */
+ (void)strqtok_r(line, ";", '\\', quotes, &qt, &st);
+ /* we don't care if there was an unmatched quote at this point, let's see what happens */
+ if (*line == '\0')
+ return 0;
- /* empty line? nothing to do here. */
- if (x == NULL)
+ /* carve off the first token, determine if it is a label */
+ x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st);
+ if (x == NULL || *x == '\0')
return 0;
+ if (qt) {
+ /* labels could contain an unmatched quote character, I guess? */
+ qt = NULL;
+ }
+
+ /* we have something, try to make sense of what it is */
+
+#ifdef NON_SPEC_LABELS
+ /* I want my labels like 'label:' */
+ if ( *(x + strlen(line) - 1) == ':' ) {
+ *(x + strlen(line) - 1) = '\0';
+ DEBUG_PRINTF("label: %s\n", x);
- /* labels end with :, otherwise its an opcode */
- if ((y = strrchr(x, ':')) != NULL) {
- *y = '\0';
label = x;
- opcode = strtok_r(NULL, sep, &st);
+
+ opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
} else {
label = NULL;
opcode = x;
}
+#endif /* NON_SPEC_LABELS */
- if (opcode) {
- operand = st;
+ /* spec gives example of labels as ':label' */
+ if (*x == ':') {
+ *x = '\0';
+ x++;
+ label = x;
+ opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
+ } else {
+ label = NULL;
+ opcode = x;
+ }
+ /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */
+
+ if (opcode && *opcode) {
+ /* if we have an opcode, we'll need at least one word to compile instruction */
+ instr_words_needed++;
+
+ /* build a list of operands to hang off this instruction */
+ while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) {
+ struct operand_ *new_operand;
+ char *y;
+
+ /* trim whitespaces */
+ x += strspn(x, whitespace);
+
+ if (*x) {
+ for (y = x + strlen(x) - 1; *y; y--) {
+ if (strchr(whitespace, *y)) {
+ *y = '\0';
+ }
+ }
+ }
+ /* nothing left? */
+ if (*x == '\0') {
+ fprintf(stderr, "null operand encountered\n");
+ return -1;
+ }
+
+ DEBUG_PRINTF("tokenized operand '%s'\n", x);
+
+ new_operand = malloc(sizeof *new_operand);
+ if (new_operand == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+
+ new_operand->operand = strdup(x);
+ if (new_operand->operand == NULL) {
+ fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
+ return -1;
+ }
+
+ new_operand->next = NULL;
+
+ if (strchr(quotes, x[0])) {
+ /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */
+ instr_words_needed += strlen(x) - 1;
+ }
+ instr_words_needed++;
+
+ *operand_tail = new_operand;
+ operand_tail = &(*operand_tail)->next;
+ }
}
- instr = calloc(1, sizeof *instr);
+ DEBUG_PRINTF("allocating new instruction with room for %zu bytes\n", instr_words_needed);
+
+ instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
if (instr == NULL) {
fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
return -1;
}
- instr->label = label;
- instr->opcode = opcode;
+ if (label) {
+ instr->label = strdup(label);
+ if (instr->label == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+ } else {
+ label = NULL;
+ }
- if (operand) {
-
+ if (opcode) {
+ instr->opcode = strdup(opcode);
+ if (instr->opcode == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+ } else {
+ opcode = NULL;
}
+ instr->operands = operand_list;
+
*next_instr = instr;
return 0;
}
-/* thish should grow buffer to fit huge linesh, but I jusht don't care right now, hic */
+/* try to generate bytecode for an instruction */
+/* returns -1 on unrecoverable error */
static
-int parse_stream_(FILE *f) {
- struct instruction_ *instr;
- char buf[(1<<14)];
+int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
+ unsigned int nwu = 0; /* number of words used */
+ unsigned int incomplete = 0;
+ int bits;
+ struct operand_ *o = i->operands;
+
+ if (opt_.verbose > 2) {
+ printf("%s: assembling %p ", __func__, (void *)i);
+ instruction_print_(i, 1);
+ printf("(line %zu)\n", i->src_line);
+ }
+
+ if (i->opcode == NULL) {
+ assert(i->label);
+ assert(i->operands == NULL);
+ /* just a label, move along */
+ i->length = 0;
+ i->ready = 1;
+ return 0;
+ }
+
+ /* special case DAT */
+ if (strncasecmp(i->opcode, "DAT", 3) == 0) {
+ DEBUG_PRINTF("processing DAT...\n");
+
+ i->length = 0;
+
+ for ( /* */ ; o; o = o->next) {
+ size_t j, dat_len;
+ char *x;
+ unsigned long l;
+
+ DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, (void *)o->next);
+
+ /* is this a string? */
+ /* does it start with a quote, and end with the same quote? */
+ if ( (x = strchr("\"'`", o->operand[0])) ) {
+ dat_len = strlen(o->operand) - 1;
+ if (o->operand[dat_len] == *x) {
+ /* it is a string */
+ DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
+
+ for (j = 0, x = o->operand + 1;
+ j < dat_len - 1;
+ j++, x++) {
+ i->instr_words[i->length] = *x;
+ i->length++;
+ }
+ /* Note that strings in DAT do not include their zero-terminators */
+ /* specify as 'DAT "string", 0' */
+ }
+ continue;
+ }
+
+ /* is this a number? */
+ char *ep;
+ errno = 0;
+ l = strtoul(o->operand, &ep, 0);
+ if (errno == 0
+ && (*o->operand && (*ep == '\0')) ) {
+ /* conversion succeeded */
+ if (l > 0xffff) {
+ fprintf(stderr, "value '%lu' out of range\n", l);
+ return -1;
+ }
+ i->instr_words[i->length] = l;
+ i->length++;
+ continue;
+ }
+
+ /* otherwise assume it's a label, even if we don't know what it is */
+ if (label_addr_(labels, o->operand, &i->instr_words[i->length])) {
+ DEBUG_PRINTF("(deferred label '%s' resolution)\n", o->operand);
+ incomplete = 1;
+ }
+ i->length++;
+ }
+
+ if (incomplete) {
+ DEBUG_PRINTF("pending label address\n");
+ } else {
+ i->ready = 1;
+ }
+
+ return 0;
+ } /* end of DAT */
+
+ /* start with opcode bits */
+ bits = opcode_bits_(i->opcode);
+ if (bits < 0) {
+ fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
+ for (o = i->operands; o; o = o->next)
+ fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
+ fprintf(stderr, "'\n");
+ return -1;
+ }
+ i->instr_words[0] |= bits & N_BIT_MASK(OPCODE_BITS);
+
+ /* in rendered bytecode, all instructions have a and b operands; nbi instructions occupy 'b operand' bits. */
+ if ((bits & N_BIT_MASK(OPCODE_BITS)) == 0) {
+ bits = nbi_opcode_bits_(i->opcode);
+ if (bits < 0) {
+ fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
+ exit(EX_SOFTWARE);
+ }
+ } else {
+ if (o == NULL) {
+ fprintf(stderr, "'%s' requires more operands\n", i->opcode);
+ return -1;
+ }
+ bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
+ if (bits == -1) {
+ fprintf(stderr, "couldn't assemble instruction\n");
+ return -1;
+ } else if (bits == -2) {
+ DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
+ /* keep going, but don't finalize until we can calculate label address */
+ incomplete = 1;
+ bits = 0;
+ }
+ o = o->next;
+ }
+ if (bits > N_BIT_MASK(OPERAND_B_BITS)) {
+ fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "b");
+ return -1;
+ }
+ i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_B_BITS)) << OPCODE_BITS;
+
+ if (o == NULL) {
+ fprintf(stderr, "'%s' requires more operands\n", i->opcode);
+ return -1;
+ }
+
+ bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
+ if (bits == -1) {
+ fprintf(stderr, "couldn't assemble instruction\n");
+ return -1;
+ } else if (bits == -2) {
+ DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
+ /* keep going, but don't finalize until we can calculate label address */
+ incomplete = 1;
+ bits = 0;
+ }
+ o = o->next;
+ if (bits > N_BIT_MASK(OPERAND_A_BITS)) {
+ fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "a");
+ }
+ i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_A_BITS)) << (OPCODE_BITS + OPERAND_B_BITS);
+
+ if (o != NULL) {
+ fprintf(stderr, "too many operands\n");
+ return -1;
+ }
+
+ /* counting labels as words, we now know at least the maximum instruction length */
+
+ i->length = nwu + 1;
+
+ DEBUG_PRINTF("instruction words: [%u]", i->length);
+ for (bits = 0; bits <= (int)nwu; bits++)
+ DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
+
+ if (incomplete) {
+ DEBUG_PRINTFQ(" (preliminary)");
+ } else {
+ i->ready = 1;
+ }
+
+ DEBUG_PRINTFQ("\n");
+
+ return 0;
+}
+
+/* parse_stream_
+ * read lines from stream f
+ * break each line into parts, populate parts into structures
+ */
+static
+int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
+ struct instruction_ *instr, **instr_list_entry;
+ unsigned int line = 0;
+ int retval = 0;
+ char buf[0x4000];
buf[sizeof buf - 1] = '\0';
while (fgets(buf, sizeof buf, f)) {
+ line++;
+
if (buf[sizeof buf - 1] != '\0') {
- fprintf(stderr, "input buffer exhausted\n");
+ fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
+ retval = -1;
break;
}
- if (buf_tokenize_(buf, &instr)) {
- fprintf(stderr, "trouble tokenizing input\n");
+ if (tokenize_line_(buf, &instr)) {
+ fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
+ retval = -1;
break;
}
if (instr) {
- struct operand_ *o;
- if (instr->label) {
- printf("TRACE: new label '%s'\n", instr->label);
+ instr->src_line = line;
+ /* add to list of instructions */
+ instr_list_entry = dynarray_add(instructionps, &instr);
+ if (instr_list_entry == NULL) {
+ fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
+ break;
}
- printf("TRACE: tokenized opcode:%s operands:",
- instr->opcode);
- for (o = instr->operands; o; o = o->next) {
- printf("%s%s", o->operand, o->next ? ", " : "");
- }
- printf("\n");
+ if (instr->label) {
+ struct label_ new_label = {
+ .label = instr->label,
+ .instr = instr_list_entry,
+ .ready = 0,
+ .addr = 0,
+ };
+ if (label_find_(labels, instr->label)) {
+ fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
+ break;
+ }
+
+ if (dynarray_add(labels, &new_label) == NULL) {
+ fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
+ break;
+ }
+ label_addr_calculate_(instructionps, labels);
+ }
- /* add to queue of instructions */
+ if (instr_assemble_(labels, instr, allow_short_labels)) {
+ fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n");
+ break;
+ }
}
}
if (ferror(f)) {
return -1;
}
+ return retval;
+}
+
+/* assemble_check_
+ * make a full pass over instruction list to resolve labels
+ */
+static
+int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
+ int retval = 0;
+ size_t x;
+
+ /* fixing short labels .... */
+ /* by here we have our list of instructions and their maximum instruction lengths */
+ /* and we have a list of addresses, based on those maximum lengths */
+ /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
+ /* and reassemble.. */
+ /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
+ /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
+
+ /* try this? keep another list of locations a label address is used */
+ /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
+
+ DEBUG_PRINTF(" final pass of assembler...\n");
+ for (x = 0; x < instructionps->entries; x++) {
+ struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
+ retval = instr_assemble_(labels, *instrp, allow_short_labels);
+ if (retval) {
+ fprintf(stderr, "instruction %zu failed to assemble\n", x);
+ return retval;
+ }
+ if (! (*instrp)->ready) {
+ fprintf(stderr, "instruction not resolvable at line %lu\n", (*instrp)->src_line);
+ return -1;
+ }
+ }
+
+ VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
+ for (x = 0; x < labels->entries; x++) {
+ struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
+ if (! l->ready)
+ retval |= -1;
+ if (opt_.verbose) {
+ printf("%3s0x%04x %-32s ",
+ l->ready ? "" : "*",
+ l->addr,
+ l->label);
+ instruction_print_(*(l->instr), 0);
+ printf("\n");
+ }
+ }
+
+ VERBOSE_PRINTF("\n");
+
+ if (retval)
+ fprintf(stderr, "some labels could not be resolved\n");
+
+ return retval;
+}
+
+/* output_
+ * write assembled words to named file
+ */
+static
+int output_(struct dynamic_array *instructionps, const char *filename) {
+ FILE *of = NULL;
+ struct instruction_ **instrp;
+ size_t i, r, total_words = 0;
+ size_t x;
+
+ if (! opt_.dryrun) {
+ of = fopen(filename, "w");
+ if (of == NULL) {
+ fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
+ return -1;
+ }
+ }
+
+ for (i = 0; i < instructionps->entries; i++) {
+ instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
+
+ if (opt_.verbose) {
+ int s;
+ s = instruction_print_(*instrp, 1);
+ printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
+ for (x = 0; x < (*instrp)->length; x++) {
+ printf(" %04x", (*instrp)->instr_words[x]);
+ }
+ printf("\n");
+ }
+
+ if (of) {
+ r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
+ if (r < (*instrp)->length) {
+ fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
+ return -1;
+ }
+ }
+ total_words += (*instrp)->length;
+ }
+
+ fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
+ opt_.dryrun ? "assembled" : "wrote",
+ i,
+ total_words);
+
return 0;
}
+static struct dynamic_array *instructionps_;
+static struct dynamic_array *labels_;
+
int main(int argc, char *argv[]) {
const char *out_filename = NULL;
+ unsigned int allow_short_labels = 0;
int c;
- while ( (c = getopt(argc, argv, "ho:")) != EOF ) {
+ while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
switch (c) {
+ case 'd':
+ opt_.dryrun++;
+ break;
+
+ case 's':
+ allow_short_labels++;
+ break;
+
case 'o':
if (out_filename) {
fprintf(stderr, "Sorry, I can only write one file at a time.\n");
out_filename = optarg;
break;
+ case 'v':
+ opt_.verbose++;
+ break;
+
case 'h':
usage_(argv[0], 1);
exit(EX_OK);
}
}
+ argc -= optind;
+ argv += optind;
+
if (out_filename == NULL)
out_filename = out_filename_default_;
+ /* init tables */
+ instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
+ labels_ = dynarray_new(sizeof(struct label_), 256);
+ if (instructionps_ == NULL
+ || labels_ == NULL) {
+ fprintf(stderr, "failed to initialize\n");
+ exit(EX_OSERR);
+ }
+
/* if filenames were specified, parse them instead of stdin */
- if (argc - optind) {
- while (argc - optind) {
- FILE *f = fopen(argv[argc - optind], "r");
+ if (argc) {
+ while (argc) {
+ char *filename = *argv;
+ FILE *f = fopen(filename, "r");
+
+ argc--, argv++;
+
if (f == NULL) {
- fprintf(stderr, "%s('%s'):%s\n", "fopen", argv[argc - optind], strerror(errno));
- optind++;
+ fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
continue;
}
- parse_stream_(f);
-
+ VERBOSE_PRINTF("assembling '%s'...\n", filename);
+ c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
fclose(f);
-
- optind++;
+ if (c)
+ break;
}
} else {
- parse_stream_(stdin);
+ VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
+ c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
+ }
+ if (c) {
+ fprintf(stderr, "could not parse input, aborting\n");
+ exit(EX_DATAERR);
+ }
+
+ if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
+ fprintf(stderr, "errors prevented assembly\n");
+ exit(EX_DATAERR);
+ }
+
+ if (output_(instructionps_, out_filename)) {
+ fprintf(stderr, "failed to create output\n");
+ exit(EX_OSERR);
}
exit(EX_OK);