X-Git-Url: http://git.squeep.com/?p=dcpu16;a=blobdiff_plain;f=as-dcpu16.c;h=37954aaf20b76fce9bb5410485c507f3e531f3fe;hp=91f63ee0fb1e571371452dbda8e9a976649abe6b;hb=HEAD;hpb=f4a3d9f3bf88f2b3840067674b3eed636516fcee diff --git a/as-dcpu16.c b/as-dcpu16.c index 91f63ee..37954aa 100644 --- a/as-dcpu16.c +++ b/as-dcpu16.c @@ -2,19 +2,48 @@ #include #include #include +#include #include #include #include -/* #include dcpu16.h */ -typedef unsigned short DCPU16_WORD; +#include "dcpu16.h" +#include "common.h" -/* quick and dirty assembler for dcpu16 */ +/* + * quick and dirty assembler for dcpu16 + * + * Justin Wind + * 2012 04 07 - implementation started + * 2012 04 10 - functional + * 2012 04 16 - support dat statements + * 2012 05 05 - v1.7 revision started + * 2012 05 08 - v1.7 revision implemented + * + * TODO + * needs ability to specify location for code or data + * needs ability to specify label as relative to another label + * short labels not correctly computed + * in label struct, store index of instruction rather than ptr, ptrs for iteration in addr calculation are ugly + */ static const char * const src_id_ = "$Id$"; const char const out_filename_default_[] = "a.out"; +/* global invocation options */ +struct options { + unsigned int verbose; + unsigned int dryrun; +} opt_ = { + .verbose = 0, + .dryrun = 0, +}; + +#define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0) +#define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0) +#define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0) + static void usage_(char *prog, unsigned int full) { FILE *f = full ? stdout : stderr; @@ -27,13 +56,16 @@ void usage_(char *prog, unsigned int full) { fprintf(f, "%s -- \n\n", prog); - fprintf(f, "Usage: %s\n", + fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n", prog); if (full) { fprintf(f, "\nOptions:\n" "\t-h -- this screen\n" - "\t-o -- output to [default: %s]\n", + "\t-o -- output to [default: %s]\n" + "\t-s -- allow short labels in instruction words\n" + "\t-d -- dry run, print results, do not write to file\n" + "\t-v -- verbose output\n", out_filename_default_); fprintf(f, "\n%78s\n", @@ -41,120 +73,955 @@ void usage_(char *prog, unsigned int full) { } } +/* LSB-0 aaaaaabbbbbooooo */ +#define OPCODE_BITS 5 +#define OPERAND_B_BITS 5 +#define OPERAND_A_BITS 6 +#define N_BIT_MASK(__x__) ((1 << (__x__)) - 1) + + +/* instructions have operands */ struct operand_ { struct operand_ *next; - char *operand; + char *operand; /* tokenized operand text */ }; +/* keep an array of instructions as we read them in */ struct instruction_ { - struct instruction_ *next; - char *label; - char *opcode; - struct operand_ *operands; - - unsigned int length; /* words */ + size_t src_line; + char *label; /* set if a label points here */ + char *opcode; /* tokenized instruction text */ + struct operand_ *operands; /* list of operands */ + unsigned int ready : 1; /* bytecode computed? */ + unsigned int length; /* number of words of bytecode */ DCPU16_WORD instr_words[]; }; -/* buf must be 0-terminated */ +/* keep an array of labels, indexed back to their instruction locations */ +struct label_ { + char *label; /* name of label */ + struct instruction_ **instr; /* pointer into array of instructions */ + unsigned int ready : 1; /* do we know where this label is yet? */ + DCPU16_WORD addr; +}; + + +/* locate and return the label entry matching name */ static -int buf_tokenize_(char *buf, struct instruction_ **next_instr) { - const char const *sep = " \t\n"; - struct instruction_ *instr = NULL; - char *label = NULL, - *opcode = NULL, - *operand = NULL; +struct label_ *label_find_(struct dynamic_array *labels, char *name) { + size_t x; + + for (x = 0; x < labels->entries; x++) { + struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x); + if (strcmp(l->label, name) == 0) + return l; + } + return NULL; +} + + +/* if a label has a validly-calculated address, fetch it */ +static +int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) { + struct label_ *l; + + if ( (l = label_find_(labels, name)) == NULL ) + return -1; + if (! l->ready) + return -2; + *addr = l->addr; + return 0; +} + + +/* attempt to determine the addresses of all labels */ +static +void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) { + size_t i; + + /* idea: label1:label2 - calculated as offset between labels */ + + /* for each label.. */ + for (i = 0; i < labels->entries; i++) { + struct label_ *l; + struct instruction_ **instr; + unsigned int word_count = 0; + + l = (struct label_ *)DYNARRAY_ITEM(*labels, i); + + DEBUG_PRINTFQ("%s: calculating address of label '%s'\n", __func__, l->label); + +#if 0 +force full resolution while debugging + /* if it's already calculated, great. */ + if (l->ready) + continue; +#endif + + /* + * starting at the instruction for this label, + * walk backwards through the list of instructions + * until we get to the start or a known prior label address. + * update our label with the freshly calculated addr + */ + + /* first fetch the instruction associated with the label we want to know about.. */ + /* the addr of this instruction will be whatever follows all the preceding instructions */ + /* so back up one before counting instruction lengths... */ + instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr; + /* is it the first one? */ + if (instr == (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) + break; + + instr--; + + while (instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) { + if ((*instr)->ready == 0) + DEBUG_PRINTF("%s: instr '%s' not ready\n", __func__, (*instr)->opcode); + word_count += (*instr)->length; + + DEBUG_PRINTF("%s: instr '%s' takes '%u' bytes\n", __func__, (*instr)->opcode, (*instr)->length); + + /* have we come across an instruction which a label points to? + it should already be calculated, so just add that on and be done */ + if ((*instr)->label + && strcmp((*instr)->label, l->label)) { + DCPU16_WORD addr; + + if (label_addr_(labels, (*instr)->label, &addr)) { + fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n", + (*instr)->label, + l->label); + continue; + } + + word_count += addr; + break; + } + instr--; + } + l->addr = word_count; + l->ready = 1; + DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count); + } +} + + +/* generate the nibble for a given basic opcode */ +static +int opcode_bits_(char *opcode) { + static struct { + char op[4]; + char value; + } opcodes_lower_nibble[] = { + { "JSR", 0x00 }, + { "INT", 0x00 }, + { "IAG", 0x00 }, + { "IAS", 0x00 }, + { "RFI", 0x00 }, + { "IAQ", 0x00 }, + { "HWN", 0x00 }, + { "HWQ", 0x00 }, + { "HWI", 0x00 }, + { "SET", 0x01 }, + { "ADD", 0x02 }, + { "SUB", 0x03 }, + { "MUL", 0x04 }, + { "MLI", 0x05 }, + { "DIV", 0x06 }, + { "DVI", 0x07 }, + { "MOD", 0x08 }, + { "MDI", 0x09 }, + { "AND", 0x0a }, + { "BOR", 0x0b }, + { "XOR", 0x0c }, + { "SHR", 0x0d }, + { "ASR", 0x0e }, + { "SHL", 0x0f }, + { "IFB", 0x10 }, + { "IFC", 0x11 }, + { "IFE", 0x12 }, + { "IFN", 0x13 }, + { "IFG", 0x14 }, + { "IFA", 0x15 }, + { "IFL", 0x16 }, + { "IFU", 0x17 }, + { "ADX", 0x1a }, + { "SBX", 0x1b }, + { "STI", 0x1e }, + { "SDI", 0x1f }, + { "", 0x00 } + }, *o; + + for (o = opcodes_lower_nibble; o->op[0]; o++) { + if (strcasecmp(o->op, opcode) == 0) + break; + } + + if (o->op[0] == '\0') { + fprintf(stderr, "unknown instruction '%s'\n", opcode); + return -1; + } + + return o->value; +} + +/* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */ +static +int nbi_opcode_bits_(char *nbi_opcode) { + static struct { + char op[4]; + char value; + } nbi_opcodes_bits[] = { + { " ", 0x00 }, /* reserved for future */ + { "JSR", 0x01 }, + { "INT", 0x08 }, + { "IAG", 0x09 }, + { "IAS", 0x0a }, + { "RFI", 0x0b }, + { "IAQ", 0x0c }, + { "HWN", 0x10 }, + { "HWQ", 0x11 }, + { "HWI", 0x12 }, + { "", 0x00 } + }, *o; + + for (o = nbi_opcodes_bits; o->op[0]; o++) { + if (strcasecmp(o->op, nbi_opcode) == 0) + break; + } + + if (o->op[0] == '\0') { + fprintf(stderr, "unknown nbi instruction '%s'\n", o->op); + return -1; + } + + return o->value; +} + +/* convert register character like 'x' to value like 0x03 */ +static inline +unsigned int register_enumerate_(char r) { + const char regs[] = "AaBbCcXxYyZzIiJj"; + const char *x = strchr(regs, r); + + if (x) + return (x - regs)/2; + + fprintf(stderr, "internal error, unknown register character 0x%02x\n", r); + return -1; +} + +/* removes all occurences of chars from buf */ +static inline +void buf_strip_chars_(char *buf, char *chars) { + char *s, *d; + + for (s = d = buf; *s; s++, d++) { + while (*s && strchr(chars, *s)) { + s++; + } + if (!*s) + break; + *d = *s; + } + *d = *s; +} + + +/* value_bits_ + * generate the six bits for a given operand string + * returns -1 if it could not parse the operand + * returns -2 if it could not parse the operand due to an unresolved label + * notes: nextword may be overwritten even if it's not used in final instruction + * + */ +static +int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) { + static char *operand = NULL; + static size_t operand_sz = 0; + + unsigned long l; + char *o, *ep; + + /* + Our operand working buffer shouldn't ever need to be too big, + but DAT might blow that assumption. + */ + if (operand_sz <= strlen(operand_orig)) { + void *tmp_ptr; + size_t new_sz = strlen(operand_orig); - char *x, - *y, - *st; + if (new_sz < 256) + new_sz = 256; + new_sz += 256; - assert(buf != NULL); - assert(next_instr != NULL); + DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz); + tmp_ptr = realloc(operand, new_sz); + if (tmp_ptr == NULL) { + fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno)); + return -1; + } + operand = tmp_ptr; + operand_sz = new_sz; + } + + o = strcpy(operand, operand_orig); + + DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */ + + /* this is a very stupid parser */ + + /* first, let's trim all whitespace out of string at once to make parsing easier */ + buf_strip_chars_(operand, " \t\n"); + + /* single character might match a register */ + if (strlen(operand) == 1 + && strchr("AaBbCcXxYyZzIiJj", *operand)) { + DEBUG_PRINTFQ("is register %c\n", *operand); + return register_enumerate_(*operand); + } + + /* easy matches */ + + /* push and pop now share the same operand value */ + if (strcasecmp(operand, "POP") == 0 + || strcasecmp(operand, "[SP++]") == 0) { + DEBUG_PRINTFQ("is POP\n"); + return 0x18; + } + if (strcasecmp(operand, "PUSH") == 0 + || strcasecmp(operand, "[--SP]") == 0) { + DEBUG_PRINTFQ("is PUSH\n"); + return 0x18; + } + + if (strcasecmp(operand, "PEEK") == 0 + || strcasecmp(operand, "[SP]") == 0) { + DEBUG_PRINTFQ("is PEEK\n"); + return 0x19; + } + + /* this could be better, if we had a real token tree */ + if (strncasecmp(operand, "PICK", 4) == 0) { + DEBUG_PRINTFQ("is PICK "); + + errno = 0; + l = strtoul(operand + 4, &ep, 0); + if (errno == 0 + && (*(operand + 4) && (*ep == '\0')) ) { + if (l > 0xffff) { + DEBUG_PRINTFQ("(out of range)\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } + } else if (errno == ERANGE) { + DEBUG_PRINTFQ("(out of range)\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } + *nextword = l & 0xffff; + *nextwordused += 1; + DEBUG_PRINTFQ("0x%04x\n", *nextword); + return 0x1a; + } + + if (strcasecmp(operand, "SP") == 0) { + DEBUG_PRINTFQ("is register SP\n"); + return 0x1b; + } + if (strcasecmp(operand, "PC") == 0) { + DEBUG_PRINTFQ("is register PC\n"); + return 0x1c; + } + if (strcasecmp(operand, "EX") == 0) { + DEBUG_PRINTFQ("is register EX\n"); + return 0x1d; + } + + /* is the operand [bracketed]? */ + if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') { + /* eat the brackets */ + operand[strlen(operand) - 1] = '\0'; + operand++; + + /* is it [register]? */ + if (strlen(operand) == 1 + && strchr("AaBbCcXxYyZzIiJj", *operand)) { + DEBUG_PRINTFQ("is dereferenced register %c\n", *operand); + return 0x08 | register_enumerate_(*operand); + } + + /* is it [register+something]? */ + if ( (ep = strchr(operand, '+')) ) { + char *reg; + char *constant; + + DEBUG_PRINTFQ("is multipart.. "); + + /* eat the plus */ + *ep = '\0'; + ep++; + + /* figure out which one is which */ + if ((strlen(ep) == 1 && strchr("AaBbCcXxYyZzIiJj", *ep)) + || (strlen(ep) == 2 && strcasecmp(ep, "SP")) ) { + reg = ep; + constant = operand; + } else if ((strlen(operand) == 1 && strchr("AaBbCcXxYyZzIiJj", *operand)) + || (strlen(operand) == 2 && strcasecmp(operand, "SP")) ) { + reg = operand; + constant = ep; + } else { + DEBUG_PRINTFQ("is unparsable\n"); + fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig); + return -1; + } + + /* check if something is understandable as a value */ + errno = 0; + l = strtoul(constant, &ep, 0); + if (errno == 0 + && (*constant && (*ep == '\0')) ) { + /* string conversion went without issue */ + /* validate it will fit in a word */ + if (l > 0xffff) { + DEBUG_PRINTFQ("is out of range\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } + + /* seems fine */ + *nextword = l & 0xffff; + *nextwordused += 1; + + /* special case [SP+n]/PICK n */ + if (strlen(reg) == 2) { + DEBUG_PRINTFQ("is PICK 0x%04x\n", *nextword); + return 0x1a; + } + + DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword); + return 0x10 | register_enumerate_(*reg); + } else if (errno == ERANGE) { + fprintf(stderr, "%s('%s'):%s\n", "strtoul", constant, strerror(errno)); + } + + /* what? still here? assume it's a label, I guess */ + /* try to populate nextword with label address */ + if (label_addr_(labels, operand, nextword)) { + DEBUG_PRINTFQ("(deferred label resolution)\n"); + *nextwordused += 1; + return -2; + } + DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg); + *nextwordused += 1; + return 0x10 | register_enumerate_(*reg); + } + + /* it must just be a dereferenced literal then */ + + errno = 0; + l = strtoul(operand, &ep, 0); + if (errno == 0 + && (*operand && (*ep == '\0')) ) { + /* string conversion went without issue */ + /* validate it will fit in a word */ + if (l > 0xffff) { + DEBUG_PRINTFQ("is out of range\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } + + DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword); + *nextword = l & 0xffff; + *nextwordused += 1; + return 0x1e; + } else if (errno) { + /* if number wasn't parsable, just fall through and assume it's a label */ + } + + /* not a number? try a label */ + if (label_addr_(labels, operand, nextword)) { + DEBUG_PRINTFQ("(deferred label resolution)\n"); + *nextwordused += 1; + return -2; + } + DEBUG_PRINTFQ("is a dereferenced label\n"); + *nextwordused += 1; + return 0x1e; + } + + /* left with a literal or a label, then */ + + errno = 0; + l = strtoul(operand, &ep, 0); + if (errno == 0 + || (*operand && (*ep == '\0')) ) { + if (l > 0xffff) { + DEBUG_PRINTFQ("is out of range\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } + + DEBUG_PRINTFQ("is literal value (%lu)\n", l); + if (l < 0x1f) { + return l + 0x21; + } + if (l == 0xffff) { + return 0x20; + } + + *nextword = l & 0xffff; + *nextwordused += 1; + return 0x1f; + } + + /* try to populate nextword with label address */ + if (label_addr_(labels, operand, nextword)) { + DEBUG_PRINTFQ("(deferred label resolution)\n"); + /* assume non-small literal value */ + *nextwordused += 1; + return -2; + } + + DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword); + if (allow_short_labels + && (*nextword < 0x1f) ) { + DEBUG_PRINTF("small value label win\n"); + return (0x21 + *nextword) & N_BIT_MASK(OPERAND_A_BITS); + } + if (allow_short_labels + && (*nextword == 0xffff) ) { + DEBUG_PRINTF("small value label win\n"); + return 0x20; + } + + *nextwordused += 1; + return 0x1f; +} + +/* prints an instruction's assembly */ +static inline +int instruction_print_(struct instruction_ *i, unsigned int with_label) { + struct operand_ *o; + int r; + + if (with_label) + r = printf("%-16s ", i->label ? i->label : ""); + + r = printf("%3s", i->opcode ? i->opcode : ""); + + for (o = i->operands; o; o = o->next) + r += printf(" %s%s", o->operand, o->next ? "," : ""); + + if (i->ready) { + DCPU16_WORD l; + printf(" ["); + l = dcpu16_mnemonify_buf(i->instr_words); + printf("]"); + + if (i->length != l) + DEBUG_PRINTF("!!internal inconsistency!! i->length:%u l:%hu should match\n", i->length, l); + } + return r; +} + +/* tokenize_line_ + * Parses a zero-terminated line of input into a newly-allocated struct instruction_. + * [label] instruction [operand[,operand[,...]]] + * Does no validation of contents of any of these tokens, as of yet. + * does not clean up after itself if a malloc fails + */ +static +int tokenize_line_(char *line, struct instruction_ **next_instr) { + const char const *whitespace = " \t\n"; + const char const *quotes = "\"'`"; + struct instruction_ *instr = NULL; + char *x, *st, *qt; + char *label, *opcode; + struct operand_ *operand_list = NULL; + struct operand_ **operand_tail = &operand_list; + size_t instr_words_needed = 0; + + assert(line); + assert(next_instr); *next_instr = NULL; - /* kill comments */ - if ((x = strchr(buf, ';')) != NULL) - *x = '\0'; - /* kill leading whitespace */ - buf += strspn(buf, " \t\n"); - /* kill trailing newlines */ - if ((x = strrchr(buf, '\n')) != NULL) - *x = '\0'; + /* strip leading whitespace */ + line += strspn(line, whitespace); + if (*line == '\0') + return 0; - /* determine if first token is label, opcode, or we just have a blank line to ignore */ - x = strtok_r(buf, sep, &st); + /* set first bare ';' to '\0', thus isolating any comments */ + /* here we only care about the side-effect of truncating the first separator character */ + (void)strqtok_r(line, ";", '\\', quotes, &qt, &st); + /* we don't care if there was an unmatched quote at this point, let's see what happens */ + if (*line == '\0') + return 0; - /* empty line? nothing to do here. */ - if (x == NULL) + /* carve off the first token, determine if it is a label */ + x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st); + if (x == NULL || *x == '\0') return 0; + if (qt) { + /* labels could contain an unmatched quote character, I guess? */ + qt = NULL; + } + + /* we have something, try to make sense of what it is */ + +#ifdef NON_SPEC_LABELS + /* I want my labels like 'label:' */ + if ( *(x + strlen(line) - 1) == ':' ) { + *(x + strlen(line) - 1) = '\0'; + DEBUG_PRINTF("label: %s\n", x); - /* labels end with :, otherwise its an opcode */ - if ((y = strrchr(x, ':')) != NULL) { - *y = '\0'; label = x; - opcode = strtok_r(NULL, sep, &st); + + opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st); } else { label = NULL; opcode = x; } +#endif /* NON_SPEC_LABELS */ - if (opcode) { - operand = st; + /* spec gives example of labels as ':label' */ + if (*x == ':') { + *x = '\0'; + x++; + label = x; + opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st); + } else { + label = NULL; + opcode = x; + } + /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */ + + if (opcode && *opcode) { + /* if we have an opcode, we'll need at least one word to compile instruction */ + instr_words_needed++; + + /* build a list of operands to hang off this instruction */ + while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) { + struct operand_ *new_operand; + char *y; + + /* trim whitespaces */ + x += strspn(x, whitespace); + + if (*x) { + for (y = x + strlen(x) - 1; *y; y--) { + if (strchr(whitespace, *y)) { + *y = '\0'; + } + } + } + /* nothing left? */ + if (*x == '\0') { + fprintf(stderr, "null operand encountered\n"); + return -1; + } + + DEBUG_PRINTF("tokenized operand '%s'\n", x); + + new_operand = malloc(sizeof *new_operand); + if (new_operand == NULL) { + fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); + return -1; + } + + new_operand->operand = strdup(x); + if (new_operand->operand == NULL) { + fprintf(stderr, "%s():%s\n", "strdup", strerror(errno)); + return -1; + } + + new_operand->next = NULL; + + if (strchr(quotes, x[0])) { + /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */ + instr_words_needed += strlen(x) - 1; + } + instr_words_needed++; + + *operand_tail = new_operand; + operand_tail = &(*operand_tail)->next; + } } - instr = calloc(1, sizeof *instr); + DEBUG_PRINTF("allocating new instruction with room for %zu bytes\n", instr_words_needed); + + instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr); if (instr == NULL) { fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); return -1; } - instr->label = label; - instr->opcode = opcode; + if (label) { + instr->label = strdup(label); + if (instr->label == NULL) { + fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); + return -1; + } + } else { + label = NULL; + } - if (operand) { - + if (opcode) { + instr->opcode = strdup(opcode); + if (instr->opcode == NULL) { + fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); + return -1; + } + } else { + opcode = NULL; } + instr->operands = operand_list; + *next_instr = instr; return 0; } -/* thish should grow buffer to fit huge linesh, but I jusht don't care right now, hic */ +/* try to generate bytecode for an instruction */ +/* returns -1 on unrecoverable error */ static -int parse_stream_(FILE *f) { - struct instruction_ *instr; - char buf[(1<<14)]; +int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) { + unsigned int nwu = 0; /* number of words used */ + unsigned int incomplete = 0; + int bits; + struct operand_ *o = i->operands; + + if (opt_.verbose > 2) { + printf("%s: assembling %p ", __func__, (void *)i); + instruction_print_(i, 1); + printf("(line %zu)\n", i->src_line); + } + + if (i->opcode == NULL) { + assert(i->label); + assert(i->operands == NULL); + /* just a label, move along */ + i->length = 0; + i->ready = 1; + return 0; + } + + /* special case DAT */ + if (strncasecmp(i->opcode, "DAT", 3) == 0) { + DEBUG_PRINTF("processing DAT...\n"); + + i->length = 0; + + for ( /* */ ; o; o = o->next) { + size_t j, dat_len; + char *x; + unsigned long l; + + DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, (void *)o->next); + + /* is this a string? */ + /* does it start with a quote, and end with the same quote? */ + if ( (x = strchr("\"'`", o->operand[0])) ) { + dat_len = strlen(o->operand) - 1; + if (o->operand[dat_len] == *x) { + /* it is a string */ + DEBUG_PRINTF("DAT string operand: %s\n", o->operand); + + for (j = 0, x = o->operand + 1; + j < dat_len - 1; + j++, x++) { + i->instr_words[i->length] = *x; + i->length++; + } + /* Note that strings in DAT do not include their zero-terminators */ + /* specify as 'DAT "string", 0' */ + } + continue; + } + + /* is this a number? */ + char *ep; + errno = 0; + l = strtoul(o->operand, &ep, 0); + if (errno == 0 + && (*o->operand && (*ep == '\0')) ) { + /* conversion succeeded */ + if (l > 0xffff) { + fprintf(stderr, "value '%lu' out of range\n", l); + return -1; + } + i->instr_words[i->length] = l; + i->length++; + continue; + } + + /* otherwise assume it's a label, even if we don't know what it is */ + if (label_addr_(labels, o->operand, &i->instr_words[i->length])) { + DEBUG_PRINTF("(deferred label '%s' resolution)\n", o->operand); + incomplete = 1; + } + i->length++; + } + + if (incomplete) { + DEBUG_PRINTF("pending label address\n"); + } else { + i->ready = 1; + } + + return 0; + } /* end of DAT */ + + /* start with opcode bits */ + bits = opcode_bits_(i->opcode); + if (bits < 0) { + fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : ""); + for (o = i->operands; o; o = o->next) + fprintf(stderr, " %s%s", o->operand, o->next ? "," : ""); + fprintf(stderr, "'\n"); + return -1; + } + i->instr_words[0] |= bits & N_BIT_MASK(OPCODE_BITS); + + /* in rendered bytecode, all instructions have a and b operands; nbi instructions occupy 'b operand' bits. */ + if ((bits & N_BIT_MASK(OPCODE_BITS)) == 0) { + bits = nbi_opcode_bits_(i->opcode); + if (bits < 0) { + fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n"); + exit(EX_SOFTWARE); + } + } else { + if (o == NULL) { + fprintf(stderr, "'%s' requires more operands\n", i->opcode); + return -1; + } + bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels); + if (bits == -1) { + fprintf(stderr, "couldn't assemble instruction\n"); + return -1; + } else if (bits == -2) { + DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__); + /* keep going, but don't finalize until we can calculate label address */ + incomplete = 1; + bits = 0; + } + o = o->next; + } + if (bits > N_BIT_MASK(OPERAND_B_BITS)) { + fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "b"); + return -1; + } + i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_B_BITS)) << OPCODE_BITS; + + if (o == NULL) { + fprintf(stderr, "'%s' requires more operands\n", i->opcode); + return -1; + } + + bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels); + if (bits == -1) { + fprintf(stderr, "couldn't assemble instruction\n"); + return -1; + } else if (bits == -2) { + DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__); + /* keep going, but don't finalize until we can calculate label address */ + incomplete = 1; + bits = 0; + } + o = o->next; + if (bits > N_BIT_MASK(OPERAND_A_BITS)) { + fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "a"); + } + i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_A_BITS)) << (OPCODE_BITS + OPERAND_B_BITS); + + if (o != NULL) { + fprintf(stderr, "too many operands\n"); + return -1; + } + + /* counting labels as words, we now know at least the maximum instruction length */ + + i->length = nwu + 1; + + DEBUG_PRINTF("instruction words: [%u]", i->length); + for (bits = 0; bits <= (int)nwu; bits++) + DEBUG_PRINTFQ(" %04x", i->instr_words[bits]); + + if (incomplete) { + DEBUG_PRINTFQ(" (preliminary)"); + } else { + i->ready = 1; + } + + DEBUG_PRINTFQ("\n"); + + return 0; +} + +/* parse_stream_ + * read lines from stream f + * break each line into parts, populate parts into structures + */ +static +int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) { + struct instruction_ *instr, **instr_list_entry; + unsigned int line = 0; + int retval = 0; + char buf[0x4000]; buf[sizeof buf - 1] = '\0'; while (fgets(buf, sizeof buf, f)) { + line++; + if (buf[sizeof buf - 1] != '\0') { - fprintf(stderr, "input buffer exhausted\n"); + fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n"); + retval = -1; break; } - if (buf_tokenize_(buf, &instr)) { - fprintf(stderr, "trouble tokenizing input\n"); + if (tokenize_line_(buf, &instr)) { + fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n"); + retval = -1; break; } if (instr) { - struct operand_ *o; - if (instr->label) { - printf("TRACE: new label '%s'\n", instr->label); + instr->src_line = line; + /* add to list of instructions */ + instr_list_entry = dynarray_add(instructionps, &instr); + if (instr_list_entry == NULL) { + fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n"); + break; } - printf("TRACE: tokenized opcode:%s operands:", - instr->opcode); - for (o = instr->operands; o; o = o->next) { - printf("%s%s", o->operand, o->next ? ", " : ""); - } - printf("\n"); + if (instr->label) { + struct label_ new_label = { + .label = instr->label, + .instr = instr_list_entry, + .ready = 0, + .addr = 0, + }; + if (label_find_(labels, instr->label)) { + fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n"); + break; + } + + if (dynarray_add(labels, &new_label) == NULL) { + fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n"); + break; + } + label_addr_calculate_(instructionps, labels); + } - /* add to queue of instructions */ + if (instr_assemble_(labels, instr, allow_short_labels)) { + fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n"); + break; + } } } if (ferror(f)) { @@ -166,15 +1033,132 @@ int parse_stream_(FILE *f) { return -1; } + return retval; +} + +/* assemble_check_ + * make a full pass over instruction list to resolve labels + */ +static +int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) { + int retval = 0; + size_t x; + + /* fixing short labels .... */ + /* by here we have our list of instructions and their maximum instruction lengths */ + /* and we have a list of addresses, based on those maximum lengths */ + /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */ + /* and reassemble.. */ + /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */ + /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */ + + /* try this? keep another list of locations a label address is used */ + /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */ + + DEBUG_PRINTF(" final pass of assembler...\n"); + for (x = 0; x < instructionps->entries; x++) { + struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x); + retval = instr_assemble_(labels, *instrp, allow_short_labels); + if (retval) { + fprintf(stderr, "instruction %zu failed to assemble\n", x); + return retval; + } + if (! (*instrp)->ready) { + fprintf(stderr, "instruction not resolvable at line %lu\n", (*instrp)->src_line); + return -1; + } + } + + VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_"); + for (x = 0; x < labels->entries; x++) { + struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x); + if (! l->ready) + retval |= -1; + if (opt_.verbose) { + printf("%3s0x%04x %-32s ", + l->ready ? "" : "*", + l->addr, + l->label); + instruction_print_(*(l->instr), 0); + printf("\n"); + } + } + + VERBOSE_PRINTF("\n"); + + if (retval) + fprintf(stderr, "some labels could not be resolved\n"); + + return retval; +} + +/* output_ + * write assembled words to named file + */ +static +int output_(struct dynamic_array *instructionps, const char *filename) { + FILE *of = NULL; + struct instruction_ **instrp; + size_t i, r, total_words = 0; + size_t x; + + if (! opt_.dryrun) { + of = fopen(filename, "w"); + if (of == NULL) { + fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno)); + return -1; + } + } + + for (i = 0; i < instructionps->entries; i++) { + instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i); + + if (opt_.verbose) { + int s; + s = instruction_print_(*instrp, 1); + printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, ""); + for (x = 0; x < (*instrp)->length; x++) { + printf(" %04x", (*instrp)->instr_words[x]); + } + printf("\n"); + } + + if (of) { + r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of); + if (r < (*instrp)->length) { + fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno)); + return -1; + } + } + total_words += (*instrp)->length; + } + + fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n", + opt_.dryrun ? "assembled" : "wrote", + i, + total_words); + return 0; } +static struct dynamic_array *instructionps_; +static struct dynamic_array *labels_; + int main(int argc, char *argv[]) { const char *out_filename = NULL; + unsigned int allow_short_labels = 0; int c; - while ( (c = getopt(argc, argv, "ho:")) != EOF ) { + while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) { switch (c) { + case 'd': + opt_.dryrun++; + break; + + case 's': + allow_short_labels++; + break; + case 'o': if (out_filename) { fprintf(stderr, "Sorry, I can only write one file at a time.\n"); @@ -183,6 +1167,10 @@ int main(int argc, char *argv[]) { out_filename = optarg; break; + case 'v': + opt_.verbose++; + break; + case 'h': usage_(argv[0], 1); exit(EX_OK); @@ -193,27 +1181,57 @@ int main(int argc, char *argv[]) { } } + argc -= optind; + argv += optind; + if (out_filename == NULL) out_filename = out_filename_default_; + /* init tables */ + instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024); + labels_ = dynarray_new(sizeof(struct label_), 256); + if (instructionps_ == NULL + || labels_ == NULL) { + fprintf(stderr, "failed to initialize\n"); + exit(EX_OSERR); + } + /* if filenames were specified, parse them instead of stdin */ - if (argc - optind) { - while (argc - optind) { - FILE *f = fopen(argv[argc - optind], "r"); + if (argc) { + while (argc) { + char *filename = *argv; + FILE *f = fopen(filename, "r"); + + argc--, argv++; + if (f == NULL) { - fprintf(stderr, "%s('%s'):%s\n", "fopen", argv[argc - optind], strerror(errno)); - optind++; + fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno)); continue; } - parse_stream_(f); - + VERBOSE_PRINTF("assembling '%s'...\n", filename); + c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels); fclose(f); - - optind++; + if (c) + break; } } else { - parse_stream_(stdin); + VERBOSE_PRINTF("assembling '%s'...\n", "stdin"); + c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels); + } + if (c) { + fprintf(stderr, "could not parse input, aborting\n"); + exit(EX_DATAERR); + } + + if (assemble_check_(instructionps_, labels_, allow_short_labels)) { + fprintf(stderr, "errors prevented assembly\n"); + exit(EX_DATAERR); + } + + if (output_(instructionps_, out_filename)) { + fprintf(stderr, "failed to create output\n"); + exit(EX_OSERR); } exit(EX_OK);