#include #include #include #include #include #include #include #include #include "dcpu16.h" #include "common.h" /* * quick and dirty assembler for dcpu16 * * Justin Wind * 2012 04 07 - implementation started * 2012 04 10 - functional * 2012 04 16 - support dat statements * 2012 05 05 - v1.7 revision started * 2012 05 08 - v1.7 revision implemented * * TODO * needs ability to specify location for code or data * needs ability to specify label as relative to another label * short labels not correctly computed * in label struct, store index of instruction rather than ptr, ptrs for iteration in addr calculation are ugly */ static const char * const src_id_ = "$Id$"; const char const out_filename_default_[] = "a.out"; /* global invocation options */ struct options { unsigned int verbose; unsigned int dryrun; } opt_ = { .verbose = 0, .dryrun = 0, }; #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0) #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0) #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0) static void usage_(char *prog, unsigned int full) { FILE *f = full ? stdout : stderr; char *x = strrchr(prog, '/'); if (x && *(x + 1)) prog = x + 1; if (full) fprintf(f, "%s -- \n\n", prog); fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n", prog); if (full) { fprintf(f, "\nOptions:\n" "\t-h -- this screen\n" "\t-o -- output to [default: %s]\n" "\t-s -- allow short labels in instruction words\n" "\t-d -- dry run, print results, do not write to file\n" "\t-v -- verbose output\n", out_filename_default_); fprintf(f, "\n%78s\n", src_id_); } } /* LSB-0 aaaaaabbbbbooooo */ #define OPCODE_BITS 5 #define OPERAND_B_BITS 5 #define OPERAND_A_BITS 6 #define N_BIT_MASK(__x__) ((1 << (__x__)) - 1) /* instructions have operands */ struct operand_ { struct operand_ *next; char *operand; /* tokenized operand text */ }; /* keep an array of instructions as we read them in */ struct instruction_ { size_t src_line; char *label; /* set if a label points here */ char *opcode; /* tokenized instruction text */ struct operand_ *operands; /* list of operands */ unsigned int ready : 1; /* bytecode computed? */ unsigned int length; /* number of words of bytecode */ DCPU16_WORD instr_words[]; }; /* keep an array of labels, indexed back to their instruction locations */ struct label_ { char *label; /* name of label */ struct instruction_ **instr; /* pointer into array of instructions */ unsigned int ready : 1; /* do we know where this label is yet? */ DCPU16_WORD addr; }; /* locate and return the label entry matching name */ static struct label_ *label_find_(struct dynamic_array *labels, char *name) { size_t x; for (x = 0; x < labels->entries; x++) { struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x); if (strcmp(l->label, name) == 0) return l; } return NULL; } /* if a label has a validly-calculated address, fetch it */ static int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) { struct label_ *l; if ( (l = label_find_(labels, name)) == NULL ) return -1; if (! l->ready) return -2; *addr = l->addr; return 0; } /* attempt to determine the addresses of all labels */ static void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) { size_t i; /* idea: label1:label2 - calculated as offset between labels */ /* for each label.. */ for (i = 0; i < labels->entries; i++) { struct label_ *l; struct instruction_ **instr; unsigned int word_count = 0; l = (struct label_ *)DYNARRAY_ITEM(*labels, i); DEBUG_PRINTFQ("%s: calculating address of label '%s'\n", __func__, l->label); #if 0 force full resolution while debugging /* if it's already calculated, great. */ if (l->ready) continue; #endif /* * starting at the instruction for this label, * walk backwards through the list of instructions * until we get to the start or a known prior label address. * update our label with the freshly calculated addr */ /* first fetch the instruction associated with the label we want to know about.. */ /* the addr of this instruction will be whatever follows all the preceding instructions */ /* so back up one before counting instruction lengths... */ instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr; /* is it the first one? */ if (instr == (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) break; instr--; while (instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) { if ((*instr)->ready == 0) DEBUG_PRINTF("%s: instr '%s' not ready\n", __func__, (*instr)->opcode); word_count += (*instr)->length; DEBUG_PRINTF("%s: instr '%s' takes '%u' bytes\n", __func__, (*instr)->opcode, (*instr)->length); /* have we come across an instruction which a label points to? it should already be calculated, so just add that on and be done */ if ((*instr)->label && strcmp((*instr)->label, l->label)) { DCPU16_WORD addr; if (label_addr_(labels, (*instr)->label, &addr)) { fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n", (*instr)->label, l->label); continue; } word_count += addr; break; } instr--; } l->addr = word_count; l->ready = 1; DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count); } } /* generate the nibble for a given basic opcode */ static int opcode_bits_(char *opcode) { static struct { char op[4]; char value; } opcodes_lower_nibble[] = { { "JSR", 0x00 }, { "INT", 0x00 }, { "IAG", 0x00 }, { "IAS", 0x00 }, { "RFI", 0x00 }, { "IAQ", 0x00 }, { "HWN", 0x00 }, { "HWQ", 0x00 }, { "HWI", 0x00 }, { "SET", 0x01 }, { "ADD", 0x02 }, { "SUB", 0x03 }, { "MUL", 0x04 }, { "MLI", 0x05 }, { "DIV", 0x06 }, { "DVI", 0x07 }, { "MOD", 0x08 }, { "MDI", 0x09 }, { "AND", 0x0a }, { "BOR", 0x0b }, { "XOR", 0x0c }, { "SHR", 0x0d }, { "ASR", 0x0e }, { "SHL", 0x0f }, { "IFB", 0x10 }, { "IFC", 0x11 }, { "IFE", 0x12 }, { "IFN", 0x13 }, { "IFG", 0x14 }, { "IFA", 0x15 }, { "IFL", 0x16 }, { "IFU", 0x17 }, { "ADX", 0x1a }, { "SBX", 0x1b }, { "STI", 0x1e }, { "SDI", 0x1f }, { "", 0x00 } }, *o; for (o = opcodes_lower_nibble; o->op[0]; o++) { if (strcasecmp(o->op, opcode) == 0) break; } if (o->op[0] == '\0') { fprintf(stderr, "unknown instruction '%s'\n", opcode); return -1; } return o->value; } /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */ static int nbi_opcode_bits_(char *nbi_opcode) { static struct { char op[4]; char value; } nbi_opcodes_bits[] = { { " ", 0x00 }, /* reserved for future */ { "JSR", 0x01 }, { "INT", 0x08 }, { "IAG", 0x09 }, { "IAS", 0x0a }, { "RFI", 0x0b }, { "IAQ", 0x0c }, { "HWN", 0x10 }, { "HWQ", 0x11 }, { "HWI", 0x12 }, { "", 0x00 } }, *o; for (o = nbi_opcodes_bits; o->op[0]; o++) { if (strcasecmp(o->op, nbi_opcode) == 0) break; } if (o->op[0] == '\0') { fprintf(stderr, "unknown nbi instruction '%s'\n", o->op); return -1; } return o->value; } /* convert register character like 'x' to value like 0x03 */ static inline unsigned int register_enumerate_(char r) { const char regs[] = "AaBbCcXxYyZzIiJj"; const char *x = strchr(regs, r); if (x) return (x - regs)/2; fprintf(stderr, "internal error, unknown register character 0x%02x\n", r); return -1; } /* removes all occurences of chars from buf */ static inline void buf_strip_chars_(char *buf, char *chars) { char *s, *d; for (s = d = buf; *s; s++, d++) { while (*s && strchr(chars, *s)) { s++; } if (!*s) break; *d = *s; } *d = *s; } /* value_bits_ * generate the six bits for a given operand string * returns -1 if it could not parse the operand * returns -2 if it could not parse the operand due to an unresolved label * notes: nextword may be overwritten even if it's not used in final instruction * */ static int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) { static char *operand = NULL; static size_t operand_sz = 0; unsigned long l; char *o, *ep; /* Our operand working buffer shouldn't ever need to be too big, but DAT might blow that assumption. */ if (operand_sz <= strlen(operand_orig)) { void *tmp_ptr; size_t new_sz = strlen(operand_orig); if (new_sz < 256) new_sz = 256; new_sz += 256; DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz); tmp_ptr = realloc(operand, new_sz); if (tmp_ptr == NULL) { fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno)); return -1; } operand = tmp_ptr; operand_sz = new_sz; } o = strcpy(operand, operand_orig); DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */ /* this is a very stupid parser */ /* first, let's trim all whitespace out of string at once to make parsing easier */ buf_strip_chars_(operand, " \t\n"); /* single character might match a register */ if (strlen(operand) == 1 && strchr("AaBbCcXxYyZzIiJj", *operand)) { DEBUG_PRINTFQ("is register %c\n", *operand); return register_enumerate_(*operand); } /* easy matches */ /* push and pop now share the same operand value */ if (strcasecmp(operand, "POP") == 0 || strcasecmp(operand, "[SP++]") == 0) { DEBUG_PRINTFQ("is POP\n"); return 0x18; } if (strcasecmp(operand, "PUSH") == 0 || strcasecmp(operand, "[--SP]") == 0) { DEBUG_PRINTFQ("is PUSH\n"); return 0x18; } if (strcasecmp(operand, "PEEK") == 0 || strcasecmp(operand, "[SP]") == 0) { DEBUG_PRINTFQ("is PEEK\n"); return 0x19; } /* this could be better, if we had a real token tree */ if (strncasecmp(operand, "PICK", 4) == 0) { DEBUG_PRINTFQ("is PICK "); errno = 0; l = strtoul(operand + 4, &ep, 0); if (errno == 0 && (*(operand + 4) && (*ep == '\0')) ) { if (l > 0xffff) { DEBUG_PRINTFQ("(out of range)\n"); fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); return -1; } } else if (errno == ERANGE) { DEBUG_PRINTFQ("(out of range)\n"); fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); return -1; } *nextword = l & 0xffff; *nextwordused += 1; DEBUG_PRINTFQ("0x%04x\n", *nextword); return 0x1a; } if (strcasecmp(operand, "SP") == 0) { DEBUG_PRINTFQ("is register SP\n"); return 0x1b; } if (strcasecmp(operand, "PC") == 0) { DEBUG_PRINTFQ("is register PC\n"); return 0x1c; } if (strcasecmp(operand, "EX") == 0) { DEBUG_PRINTFQ("is register EX\n"); return 0x1d; } /* is the operand [bracketed]? */ if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') { /* eat the brackets */ operand[strlen(operand) - 1] = '\0'; operand++; /* is it [register]? */ if (strlen(operand) == 1 && strchr("AaBbCcXxYyZzIiJj", *operand)) { DEBUG_PRINTFQ("is dereferenced register %c\n", *operand); return 0x08 | register_enumerate_(*operand); } /* is it [register+something]? */ if ( (ep = strchr(operand, '+')) ) { char *reg; char *constant; DEBUG_PRINTFQ("is multipart.. "); /* eat the plus */ *ep = '\0'; ep++; /* figure out which one is which */ if ((strlen(ep) == 1 && strchr("AaBbCcXxYyZzIiJj", *ep)) || (strlen(ep) == 2 && strcasecmp(ep, "SP")) ) { reg = ep; constant = operand; } else if ((strlen(operand) == 1 && strchr("AaBbCcXxYyZzIiJj", *operand)) || (strlen(operand) == 2 && strcasecmp(operand, "SP")) ) { reg = operand; constant = ep; } else { DEBUG_PRINTFQ("is unparsable\n"); fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig); return -1; } /* check if something is understandable as a value */ errno = 0; l = strtoul(constant, &ep, 0); if (errno == 0 && (*constant && (*ep == '\0')) ) { /* string conversion went without issue */ /* validate it will fit in a word */ if (l > 0xffff) { DEBUG_PRINTFQ("is out of range\n"); fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); return -1; } /* seems fine */ *nextword = l & 0xffff; *nextwordused += 1; /* special case [SP+n]/PICK n */ if (strlen(reg) == 2) { DEBUG_PRINTFQ("is PICK 0x%04x\n", *nextword); return 0x1a; } DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword); return 0x10 | register_enumerate_(*reg); } else if (errno == ERANGE) { fprintf(stderr, "%s('%s'):%s\n", "strtoul", constant, strerror(errno)); } /* what? still here? assume it's a label, I guess */ /* try to populate nextword with label address */ if (label_addr_(labels, operand, nextword)) { DEBUG_PRINTFQ("(deferred label resolution)\n"); *nextwordused += 1; return -2; } DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg); *nextwordused += 1; return 0x10 | register_enumerate_(*reg); } /* it must just be a dereferenced literal then */ errno = 0; l = strtoul(operand, &ep, 0); if (errno == 0 && (*operand && (*ep == '\0')) ) { /* string conversion went without issue */ /* validate it will fit in a word */ if (l > 0xffff) { DEBUG_PRINTFQ("is out of range\n"); fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); return -1; } DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword); *nextword = l & 0xffff; *nextwordused += 1; return 0x1e; } else if (errno) { /* if number wasn't parsable, just fall through and assume it's a label */ } /* not a number? try a label */ if (label_addr_(labels, operand, nextword)) { DEBUG_PRINTFQ("(deferred label resolution)\n"); *nextwordused += 1; return -2; } DEBUG_PRINTFQ("is a dereferenced label\n"); *nextwordused += 1; return 0x1e; } /* left with a literal or a label, then */ errno = 0; l = strtoul(operand, &ep, 0); if (errno == 0 || (*operand && (*ep == '\0')) ) { if (l > 0xffff) { DEBUG_PRINTFQ("is out of range\n"); fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); return -1; } DEBUG_PRINTFQ("is literal value (%lu)\n", l); if (l < 0x1f) { return l + 0x21; } if (l == 0xffff) { return 0x20; } *nextword = l & 0xffff; *nextwordused += 1; return 0x1f; } /* try to populate nextword with label address */ if (label_addr_(labels, operand, nextword)) { DEBUG_PRINTFQ("(deferred label resolution)\n"); /* assume non-small literal value */ *nextwordused += 1; return -2; } DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword); if (allow_short_labels && (*nextword < 0x1f) ) { DEBUG_PRINTF("small value label win\n"); return (0x21 + *nextword) & N_BIT_MASK(OPERAND_A_BITS); } if (allow_short_labels && (*nextword == 0xffff) ) { DEBUG_PRINTF("small value label win\n"); return 0x20; } *nextwordused += 1; return 0x1f; } /* prints an instruction's assembly */ static inline int instruction_print_(struct instruction_ *i, unsigned int with_label) { struct operand_ *o; int r; if (with_label) r = printf("%-16s ", i->label ? i->label : ""); r = printf("%3s", i->opcode ? i->opcode : ""); for (o = i->operands; o; o = o->next) r += printf(" %s%s", o->operand, o->next ? "," : ""); if (i->ready) { DCPU16_WORD l; printf(" ["); l = dcpu16_mnemonify_buf(i->instr_words); printf("]"); if (i->length != l) DEBUG_PRINTF("!!internal inconsistency!! i->length:%u l:%hu should match\n", i->length, l); } return r; } /* tokenize_line_ * Parses a zero-terminated line of input into a newly-allocated struct instruction_. * [label] instruction [operand[,operand[,...]]] * Does no validation of contents of any of these tokens, as of yet. * does not clean up after itself if a malloc fails */ static int tokenize_line_(char *line, struct instruction_ **next_instr) { const char const *whitespace = " \t\n"; const char const *quotes = "\"'`"; struct instruction_ *instr = NULL; char *x, *st, *qt; char *label, *opcode; struct operand_ *operand_list = NULL; struct operand_ **operand_tail = &operand_list; size_t instr_words_needed = 0; assert(line); assert(next_instr); *next_instr = NULL; /* strip leading whitespace */ line += strspn(line, whitespace); if (*line == '\0') return 0; /* set first bare ';' to '\0', thus isolating any comments */ /* here we only care about the side-effect of truncating the first separator character */ (void)strqtok_r(line, ";", '\\', quotes, &qt, &st); /* we don't care if there was an unmatched quote at this point, let's see what happens */ if (*line == '\0') return 0; /* carve off the first token, determine if it is a label */ x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st); if (x == NULL || *x == '\0') return 0; if (qt) { /* labels could contain an unmatched quote character, I guess? */ qt = NULL; } /* we have something, try to make sense of what it is */ #ifdef NON_SPEC_LABELS /* I want my labels like 'label:' */ if ( *(x + strlen(line) - 1) == ':' ) { *(x + strlen(line) - 1) = '\0'; DEBUG_PRINTF("label: %s\n", x); label = x; opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st); } else { label = NULL; opcode = x; } #endif /* NON_SPEC_LABELS */ /* spec gives example of labels as ':label' */ if (*x == ':') { *x = '\0'; x++; label = x; opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st); } else { label = NULL; opcode = x; } /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */ if (opcode && *opcode) { /* if we have an opcode, we'll need at least one word to compile instruction */ instr_words_needed++; /* build a list of operands to hang off this instruction */ while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) { struct operand_ *new_operand; char *y; /* trim whitespaces */ x += strspn(x, whitespace); if (*x) { for (y = x + strlen(x) - 1; *y; y--) { if (strchr(whitespace, *y)) { *y = '\0'; } } } /* nothing left? */ if (*x == '\0') { fprintf(stderr, "null operand encountered\n"); return -1; } DEBUG_PRINTF("tokenized operand '%s'\n", x); new_operand = malloc(sizeof *new_operand); if (new_operand == NULL) { fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); return -1; } new_operand->operand = strdup(x); if (new_operand->operand == NULL) { fprintf(stderr, "%s():%s\n", "strdup", strerror(errno)); return -1; } new_operand->next = NULL; if (strchr(quotes, x[0])) { /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */ instr_words_needed += strlen(x) - 1; } instr_words_needed++; *operand_tail = new_operand; operand_tail = &(*operand_tail)->next; } } DEBUG_PRINTF("allocating new instruction with room for %zu bytes\n", instr_words_needed); instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr); if (instr == NULL) { fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); return -1; } if (label) { instr->label = strdup(label); if (instr->label == NULL) { fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); return -1; } } else { label = NULL; } if (opcode) { instr->opcode = strdup(opcode); if (instr->opcode == NULL) { fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); return -1; } } else { opcode = NULL; } instr->operands = operand_list; *next_instr = instr; return 0; } /* try to generate bytecode for an instruction */ /* returns -1 on unrecoverable error */ static int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) { unsigned int nwu = 0; /* number of words used */ unsigned int incomplete = 0; int bits; struct operand_ *o = i->operands; if (opt_.verbose > 2) { printf("%s: assembling %p ", __func__, (void *)i); instruction_print_(i, 1); printf("(line %zu)\n", i->src_line); } if (i->opcode == NULL) { assert(i->label); assert(i->operands == NULL); /* just a label, move along */ i->length = 0; i->ready = 1; return 0; } /* special case DAT */ if (strncasecmp(i->opcode, "DAT", 3) == 0) { DEBUG_PRINTF("processing DAT...\n"); i->length = 0; for ( /* */ ; o; o = o->next) { size_t j, dat_len; char *x; unsigned long l; DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, (void *)o->next); /* is this a string? */ /* does it start with a quote, and end with the same quote? */ if ( (x = strchr("\"'`", o->operand[0])) ) { dat_len = strlen(o->operand) - 1; if (o->operand[dat_len] == *x) { /* it is a string */ DEBUG_PRINTF("DAT string operand: %s\n", o->operand); for (j = 0, x = o->operand + 1; j < dat_len - 1; j++, x++) { i->instr_words[i->length] = *x; i->length++; } /* Note that strings in DAT do not include their zero-terminators */ /* specify as 'DAT "string", 0' */ } continue; } /* is this a number? */ char *ep; errno = 0; l = strtoul(o->operand, &ep, 0); if (errno == 0 && (*o->operand && (*ep == '\0')) ) { /* conversion succeeded */ if (l > 0xffff) { fprintf(stderr, "value '%lu' out of range\n", l); return -1; } i->instr_words[i->length] = l; i->length++; continue; } /* otherwise assume it's a label, even if we don't know what it is */ if (label_addr_(labels, o->operand, &i->instr_words[i->length])) { DEBUG_PRINTF("(deferred label '%s' resolution)\n", o->operand); incomplete = 1; } i->length++; } if (incomplete) { DEBUG_PRINTF("pending label address\n"); } else { i->ready = 1; } return 0; } /* end of DAT */ /* start with opcode bits */ bits = opcode_bits_(i->opcode); if (bits < 0) { fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : ""); for (o = i->operands; o; o = o->next) fprintf(stderr, " %s%s", o->operand, o->next ? "," : ""); fprintf(stderr, "'\n"); return -1; } i->instr_words[0] |= bits & N_BIT_MASK(OPCODE_BITS); /* in rendered bytecode, all instructions have a and b operands; nbi instructions occupy 'b operand' bits. */ if ((bits & N_BIT_MASK(OPCODE_BITS)) == 0) { bits = nbi_opcode_bits_(i->opcode); if (bits < 0) { fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n"); exit(EX_SOFTWARE); } } else { if (o == NULL) { fprintf(stderr, "'%s' requires more operands\n", i->opcode); return -1; } bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels); if (bits == -1) { fprintf(stderr, "couldn't assemble instruction\n"); return -1; } else if (bits == -2) { DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__); /* keep going, but don't finalize until we can calculate label address */ incomplete = 1; bits = 0; } o = o->next; } if (bits > N_BIT_MASK(OPERAND_B_BITS)) { fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "b"); return -1; } i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_B_BITS)) << OPCODE_BITS; if (o == NULL) { fprintf(stderr, "'%s' requires more operands\n", i->opcode); return -1; } bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels); if (bits == -1) { fprintf(stderr, "couldn't assemble instruction\n"); return -1; } else if (bits == -2) { DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__); /* keep going, but don't finalize until we can calculate label address */ incomplete = 1; bits = 0; } o = o->next; if (bits > N_BIT_MASK(OPERAND_A_BITS)) { fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "a"); } i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_A_BITS)) << (OPCODE_BITS + OPERAND_B_BITS); if (o != NULL) { fprintf(stderr, "too many operands\n"); return -1; } /* counting labels as words, we now know at least the maximum instruction length */ i->length = nwu + 1; DEBUG_PRINTF("instruction words: [%u]", i->length); for (bits = 0; bits <= (int)nwu; bits++) DEBUG_PRINTFQ(" %04x", i->instr_words[bits]); if (incomplete) { DEBUG_PRINTFQ(" (preliminary)"); } else { i->ready = 1; } DEBUG_PRINTFQ("\n"); return 0; } /* parse_stream_ * read lines from stream f * break each line into parts, populate parts into structures */ static int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) { struct instruction_ *instr, **instr_list_entry; unsigned int line = 0; int retval = 0; char buf[0x4000]; buf[sizeof buf - 1] = '\0'; while (fgets(buf, sizeof buf, f)) { line++; if (buf[sizeof buf - 1] != '\0') { fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n"); retval = -1; break; } if (tokenize_line_(buf, &instr)) { fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n"); retval = -1; break; } if (instr) { instr->src_line = line; /* add to list of instructions */ instr_list_entry = dynarray_add(instructionps, &instr); if (instr_list_entry == NULL) { fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n"); break; } if (instr->label) { struct label_ new_label = { .label = instr->label, .instr = instr_list_entry, .ready = 0, .addr = 0, }; if (label_find_(labels, instr->label)) { fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n"); break; } if (dynarray_add(labels, &new_label) == NULL) { fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n"); break; } label_addr_calculate_(instructionps, labels); } if (instr_assemble_(labels, instr, allow_short_labels)) { fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n"); break; } } } if (ferror(f)) { fprintf(stderr, "%s():%s\n", "fgets", strerror(errno)); return -1; } if (! feof(f)) { fprintf(stderr, "parsing aborted\n"); return -1; } return retval; } /* assemble_check_ * make a full pass over instruction list to resolve labels */ static int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) { int retval = 0; size_t x; /* fixing short labels .... */ /* by here we have our list of instructions and their maximum instruction lengths */ /* and we have a list of addresses, based on those maximum lengths */ /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */ /* and reassemble.. */ /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */ /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */ /* try this? keep another list of locations a label address is used */ /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */ DEBUG_PRINTF(" final pass of assembler...\n"); for (x = 0; x < instructionps->entries; x++) { struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x); retval = instr_assemble_(labels, *instrp, allow_short_labels); if (retval) { fprintf(stderr, "instruction %zu failed to assemble\n", x); return retval; } if (! (*instrp)->ready) { fprintf(stderr, "instruction not resolvable at line %lu\n", (*instrp)->src_line); return -1; } } VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_"); for (x = 0; x < labels->entries; x++) { struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x); if (! l->ready) retval |= -1; if (opt_.verbose) { printf("%3s0x%04x %-32s ", l->ready ? "" : "*", l->addr, l->label); instruction_print_(*(l->instr), 0); printf("\n"); } } VERBOSE_PRINTF("\n"); if (retval) fprintf(stderr, "some labels could not be resolved\n"); return retval; } /* output_ * write assembled words to named file */ static int output_(struct dynamic_array *instructionps, const char *filename) { FILE *of = NULL; struct instruction_ **instrp; size_t i, r, total_words = 0; size_t x; if (! opt_.dryrun) { of = fopen(filename, "w"); if (of == NULL) { fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno)); return -1; } } for (i = 0; i < instructionps->entries; i++) { instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i); if (opt_.verbose) { int s; s = instruction_print_(*instrp, 1); printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, ""); for (x = 0; x < (*instrp)->length; x++) { printf(" %04x", (*instrp)->instr_words[x]); } printf("\n"); } if (of) { r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of); if (r < (*instrp)->length) { fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno)); return -1; } } total_words += (*instrp)->length; } fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n", opt_.dryrun ? "assembled" : "wrote", i, total_words); return 0; } static struct dynamic_array *instructionps_; static struct dynamic_array *labels_; int main(int argc, char *argv[]) { const char *out_filename = NULL; unsigned int allow_short_labels = 0; int c; while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) { switch (c) { case 'd': opt_.dryrun++; break; case 's': allow_short_labels++; break; case 'o': if (out_filename) { fprintf(stderr, "Sorry, I can only write one file at a time.\n"); exit(EX_CANTCREAT); } out_filename = optarg; break; case 'v': opt_.verbose++; break; case 'h': usage_(argv[0], 1); exit(EX_OK); default: usage_(argv[0], 0); exit(EX_USAGE); } } argc -= optind; argv += optind; if (out_filename == NULL) out_filename = out_filename_default_; /* init tables */ instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024); labels_ = dynarray_new(sizeof(struct label_), 256); if (instructionps_ == NULL || labels_ == NULL) { fprintf(stderr, "failed to initialize\n"); exit(EX_OSERR); } /* if filenames were specified, parse them instead of stdin */ if (argc) { while (argc) { char *filename = *argv; FILE *f = fopen(filename, "r"); argc--, argv++; if (f == NULL) { fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno)); continue; } VERBOSE_PRINTF("assembling '%s'...\n", filename); c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels); fclose(f); if (c) break; } } else { VERBOSE_PRINTF("assembling '%s'...\n", "stdin"); c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels); } if (c) { fprintf(stderr, "could not parse input, aborting\n"); exit(EX_DATAERR); } if (assemble_check_(instructionps_, labels_, allow_short_labels)) { fprintf(stderr, "errors prevented assembly\n"); exit(EX_DATAERR); } if (output_(instructionps_, out_filename)) { fprintf(stderr, "failed to create output\n"); exit(EX_OSERR); } exit(EX_OK); }