From: Justin Wind Date: Wed, 11 Apr 2012 01:06:24 +0000 (-0700) Subject: redid value_bits_ and assorted cleanup X-Git-Url: https://git.squeep.com/?a=commitdiff_plain;h=f557fe098d360693d5ed460adbd247b327c3cc58;p=dcpu16 redid value_bits_ and assorted cleanup value_bits_ can now handle labels in odd places --- diff --git a/as-dcpu16.c b/as-dcpu16.c index 38d77ce..c89d103 100644 --- a/as-dcpu16.c +++ b/as-dcpu16.c @@ -11,6 +11,8 @@ /* * quick and dirty assembler for dcpu16 * + * TODO + * needs ability to specify location for code or data */ static const char * const src_id_ = "$Id$"; @@ -20,8 +22,8 @@ unsigned int verbose_ = 0; unsigned int dryrun_ = 0; -#define DEBUG_NOTIFY(...) do { if (verbose_ > 2) fprintf(stderr, __VA_ARGS__); } while (0) -#define VERBOSE_NOTIFY(...) do { if (verbose_) printf(__VA_ARGS__); } while (0) +#define DEBUG_PRINTF(...) do { if (verbose_ > 2) printf(__VA_ARGS__); } while (0) +#define VERBOSE_PRINTF(...) do { if (verbose_) printf(__VA_ARGS__); } while (0) static @@ -118,7 +120,7 @@ int label_list_insert(struct label_list_ **ll, struct instruction_ **instr) { (*ll)->allocated = new_allocated; } - DEBUG_NOTIFY("TRACE: adding label '%s'\n", (*instr)->label); + DEBUG_PRINTF("TRACE: adding label '%s'\n", (*instr)->label); (*ll)->label[(*ll)->entries].label = (*instr)->label; (*ll)->label[(*ll)->entries].instr = instr; @@ -192,7 +194,7 @@ void label_addr_calculate_(struct instruction_list_ *il, struct label_list_ *ll) } ll->label[i].addr = word_count; ll->label[i].ready = 1; - DEBUG_NOTIFY("label '%s' has addr of 0x%04x\n", ll->label[i].label, word_count); + DEBUG_PRINTF("label '%s' has addr of 0x%04x\n", ll->label[i].label, word_count); } } @@ -290,151 +292,245 @@ unsigned int register_enumerate_(char r) { return -1; } -/* generate the six bits for a given operand */ -/* FIXME: MAEK BETTR */ -/* notes: nextword may be rewritten even if it's not used in final instruction */ +/* removes all occurences of chars from buf */ +static inline +void buf_strip_chars_(char *buf, char *chars) { + char *s, *d; + + for (s = d = buf; *s; s++, d++) { + while (*s && strchr(chars, *s)) { + s++; + } + if (!*s) + break; + *d = *s; + } + *d = *s; +} + +/* value_bits_ + * generate the six bits for a given operand string + * returns -1 if it could not parse the operand + * returns -2 if it could not parse the operand due to an unresolved label + * notes: nextword may be overwritten even if it's not used in final instruction + */ static int value_bits_(struct label_list_ *ll, char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) { - unsigned int retval = -1; + static char *operand = NULL; + static size_t operand_sz = 0; + unsigned long l; - char *operand, *o, *ep; + char *o, *ep; + + /* + Our operand working buffer shouldn't ever need to be too big, + but DAT might blow that assumption. + */ + if (operand_sz <= strlen(operand_orig)) { + void *tmp_ptr; + size_t new_sz = strlen(operand_orig); + + if (new_sz < 256) + new_sz = 256; + new_sz += 256; + + DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz); + tmp_ptr = realloc(operand, new_sz); + if (tmp_ptr == NULL) { + fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno)); + return -1; + } + operand = tmp_ptr; + operand_sz = new_sz; + } - operand = o = strdup(operand_orig); + o = strcpy(operand, operand_orig); - DEBUG_NOTIFY("TRACE: operand '%s' is ", operand); + DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */ - if (strlen(operand) == 1) { - if ( (strchr("ABCXYZIJ", *operand)) - || (strchr("abcxyzij", *operand)) ) { - DEBUG_NOTIFY("register\n"); - retval = register_enumerate_(*operand); - goto done; - } + /* this is a very stupid parser */ + + /* first, let's trim all whitespace out of string at once to make parsing easier */ + buf_strip_chars_(operand, " \t\n"); + + /* single character might match a register */ + if (strlen(operand) == 1 + && strchr("AaBbCcXxYyZzIiJj", *operand)) { + DEBUG_PRINTF("is register %c\n", *operand); + return register_enumerate_(*operand); + } + + /* easy matches */ + if (strcasecmp(operand, "POP") == 0) { + DEBUG_PRINTF("is POP\n"); + return 0x18; + } + if (strcasecmp(operand, "PUSH") == 0) { + DEBUG_PRINTF("is PUSH\n"); + return 0x19; + } + if (strcasecmp(operand, "PEEK") == 0) { + DEBUG_PRINTF("is PEEK\n"); + return 0x1a; + } + if (strcasecmp(operand, "SP") == 0) { + DEBUG_PRINTF("is register SP\n"); + return 0x1b; + } + if (strcasecmp(operand, "PC") == 0) { + DEBUG_PRINTF("is register PC\n"); + return 0x1c; + } + if (strcasecmp(operand, "O") == 0) { + DEBUG_PRINTF("is register O\n"); + return 0x1d; } + /* is the operand [bracketed]? */ if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') { + /* eat the brackets */ operand[strlen(operand) - 1] = '\0'; operand++; - /* trim whitespaces */ - while (strchr(" \t\n", *operand)) operand++; - ep = operand + strlen(operand) - 1; - - if (strlen(operand) == 1) { - DEBUG_NOTIFY("dereferenced register\n"); - retval = 0x08 | register_enumerate_(*operand); - goto done; + + /* is it [register]? */ + if (strlen(operand) == 1 + && strchr("AaBbCcXxYyZzIiJj", *operand)) { + DEBUG_PRINTF("is dereferenced register %c\n", *operand); + return 0x08 | register_enumerate_(*operand); } + /* is it [register+something]? */ if ( (ep = strchr(operand, '+')) ) { - char reg; + char *reg; char *constant; - while (strchr("+ \t\n", *ep)) { - *ep = '\0'; - ep++; - } - if (strlen(ep) == 1) { - reg = *ep; + /* eat the plus */ + *ep = '\0'; + ep++; + + /* figure out which one is which */ + if (strlen(ep) == 1 + && strchr("AaBbCcXxYyZzIiJj", *ep)) { + reg = ep; constant = operand; - } else if (strlen(operand) == 1) { - reg = *operand; + } else if (strlen(operand) == 1 + && strchr("AaBbCcXxYyZzIiJj", *operand) ) { + reg = operand; constant = ep; } else { - fprintf(stderr, "couldn't parse operand\n"); - goto done; + DEBUG_PRINTF("is unparsable\n"); + fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig); + return -1; } - if ( strchr("ABCXYZIJ", reg) - || strchr("abcxyzij", reg) ) { - l = strtoul(constant, &ep, 0); + /* check if something is understandable as a value */ + errno = 0; + l = strtoul(constant, &ep, 0); + if (errno == 0 + && (*constant && (*ep == '\0')) ) { + /* string conversion went without issue */ + /* validate it will fit in a word */ + if (l > 0xffff) { + DEBUG_PRINTF("is out of range\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } + + /* seems fine */ + *nextword = l & 0xffff; + *nextwordused += 1; + DEBUG_PRINTF("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword); + return 0x10 | register_enumerate_(*reg); + } else if (errno) { + DEBUG_PRINTF("is out of range\n"); + fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno)); + return -1; } - DEBUG_NOTIFY("dereferenced register+constant\n"); + /* what? still here? assume it's a label, I guess */ + /* try to populate nextword with label address */ + if (label_list_find_addr(ll, operand, nextword)) { + DEBUG_PRINTF("(deferred label resolution)\n"); + *nextwordused += 1; + return -2; + } + DEBUG_PRINTF("is a dereferenced register (%c) + label\n", *reg); + *nextwordused += 1; + return 0x10 | register_enumerate_(*reg); + } - DEBUG_NOTIFY("\tregister_index:%u %c\n", reg, register_enumerate_(reg)); - DEBUG_NOTIFY("\tconstant:%lu\n", l); + /* it must just be a dereferenced literal then */ + errno = 0; + l = strtoul(operand, &ep, 0); + if (errno == 0 + && (*operand && (*ep == '\0')) ) { + /* string conversion went without issue */ + /* validate it will fit in a word */ + if (l > 0xffff) { + DEBUG_PRINTF("is out of range\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } + + DEBUG_PRINTF("is a dereferenced literal value (%hu)\n", *nextword); *nextword = l & 0xffff; *nextwordused += 1; - retval = 0x10 | register_enumerate_(reg); - goto done; + return 0x1e; + } else if (errno) { + DEBUG_PRINTF("is out of range\n"); + fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno)); } - l = strtoul(operand, &ep, 0); - DEBUG_NOTIFY("dereferenced literal value %lu...\n", l); - *nextword = l & 0xffff; + /* not a number? try a label */ + if (label_list_find_addr(ll, operand, nextword)) { + DEBUG_PRINTF("(deferred label resolution)\n"); + *nextwordused += 1; + return -2; + } + DEBUG_PRINTF("is a dereferenced label\n"); *nextwordused += 1; - retval = 0x1e; - goto done; + return 0x1e; } - if (strcasecmp(operand, "POP") == 0) { - DEBUG_NOTIFY("POP\n"); - retval = 0x18; - goto done; - } - if (strcasecmp(operand, "PUSH") == 0) { - DEBUG_NOTIFY("PUSH\n"); - retval = 0x19; - goto done; - } - if (strcasecmp(operand, "PEEK") == 0) { - DEBUG_NOTIFY("PEEK\n"); - retval = 0x1a; - goto done; - } - if (strcasecmp(operand, "SP") == 0) { - DEBUG_NOTIFY("sp register\n"); - retval = 0x1b; - goto done; - } - if (strcasecmp(operand, "PC") == 0) { - DEBUG_NOTIFY("pc register\n"); - retval = 0x1c; - goto done; - } - if (strcasecmp(operand, "O") == 0) { - DEBUG_NOTIFY("o register\n"); - retval = 0x1d; - goto done; - } + /* left with a literal or a label, then */ + errno = 0; l = strtoul(operand, &ep, 0); + if (errno == 0 + || (*operand && (*ep == '\0')) ) { + if (l > 0xffff) { + DEBUG_PRINTF("is out of range\n"); + fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig); + return -1; + } - if (operand && *ep == '\0') { - DEBUG_NOTIFY("literal value %lu...\n", l); + DEBUG_PRINTF("is literal value (%lu)\n", l); if (l < 0x20) { - retval = l + 0x20; - goto done; - } else { - *nextword = l & 0xffff; - *nextwordused += 1; - retval = 0x1f; - goto done; + return l + 0x20; } + + *nextword = l & 0xffff; + *nextwordused += 1; + return 0x1f; } /* try to populate nextword with label address */ if (label_list_find_addr(ll, operand, nextword)) { - DEBUG_NOTIFY("currently-unknown label...\n"); + DEBUG_PRINTF("(deferred label resolution)\n"); /* assume non-small literal value */ *nextwordused += 1; - goto done; + return -2; } - DEBUG_NOTIFY("label '%s' 0x%02hx\n", operand, *nextword); + DEBUG_PRINTF("is label '%s' (0x%02hx)\n", operand, *nextword); if (*nextword < 0x20 && allow_short_labels) { - DEBUG_NOTIFY("small value label win\n"); - retval = (0x20 + *nextword) & 0x3f; - goto done; + DEBUG_PRINTF("small value label win\n"); + return (0x20 + *nextword) & 0x3f; } - retval = 0x1f; *nextwordused += 1; - -done: - free(o); - return retval; + return 0x1f; } static inline @@ -568,17 +664,21 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { /* try to generate bytecode for an instruction */ static -void instr_bytecodify_(struct label_list_ *ll, struct instruction_ *i, unsigned int allow_short_labels) { +int instr_assemble_(struct label_list_ *ll, struct instruction_ *i, unsigned int allow_short_labels) { unsigned int nwu = 0; /* number of words used */ unsigned int incomplete = 0; int bits; struct operand_ *o = i->operands; - DEBUG_NOTIFY("TRACE: codifying %s%s'%s'...", i->label ? i->label : "", i->label ? ":" : "", i->opcode); + if (verbose_ > 2) { + printf("%s: assembling ", __func__); + instruction_print_(i,1); + printf("\n"); + } if (i->ready) { - /* already codified */ - return; + /* already assembled, nothing to do */ + return 0; } /* special case DAT */ @@ -588,14 +688,17 @@ void instr_bytecodify_(struct label_list_ *ll, struct instruction_ *i, unsigned /* count total length of data.. */ /* realloc instruction */ /* populate words */ - return; + return 0; } /* start with opcode bits */ bits = opcode_bits_(i->opcode); if (bits < 0) { - fprintf(stderr, "unrecognized instruction '%s'\n", i->opcode); - return; + fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : ""); + for (o = i->operands; o; o = o->next) + fprintf(stderr, " %s%s", o->operand, o->next ? "," : ""); + fprintf(stderr, "'\n"); + return -1; } i->instr_words[0] |= 0x0f & bits; @@ -603,17 +706,20 @@ void instr_bytecodify_(struct label_list_ *ll, struct instruction_ *i, unsigned if ((bits & 0x0f) == 0) { bits = nbi_opcode_bits_(i->opcode); if (bits < 0) { - fprintf(stderr, "internal error: missing instruction in nbi opcode table\n"); - return; + fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n"); + exit(EX_SOFTWARE); } } else { if (o == NULL) { fprintf(stderr, "'%s' requires more operands\n", i->opcode); - return; + return -1; } bits = value_bits_(ll, o->operand, i->instr_words + 1, &nwu, allow_short_labels); - if (bits < 0) { - DEBUG_NOTIFY("TRACE: unresolved label\n"); + if (bits == -1) { + fprintf(stderr, "couldn't assemble instruction\n"); + return -1; + } else if (bits == -2) { + DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__); /* keep going, but don't finalize until we can calculate label address */ incomplete = 1; bits = 0; @@ -624,12 +730,15 @@ void instr_bytecodify_(struct label_list_ *ll, struct instruction_ *i, unsigned if (o == NULL) { fprintf(stderr, "'%s' requires more operands\n", i->opcode); - return; + return -1; } bits = value_bits_(ll, o->operand, i->instr_words + nwu + 1, &nwu, allow_short_labels); - if (bits < 0) { - DEBUG_NOTIFY("TRACE: unresolved label\n"); + if (bits == -1) { + fprintf(stderr, "couldn't assemble instruction\n"); + return -1; + } else if (bits == -2) { + DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__); /* keep going, but don't finalize until we can calculate label address */ incomplete = 1; bits = 0; @@ -637,62 +746,77 @@ void instr_bytecodify_(struct label_list_ *ll, struct instruction_ *i, unsigned o = o->next; i->instr_words[0] |= (bits & 0x3f) << 10; - /* counting labels as words, we now know the maximum instruction length */ - /* if label is < 0x20, it can take up less space */ + if (o != NULL) { + fprintf(stderr, "too many operands\n"); + return -1; + } + + /* counting labels as words, we now know at least the maximum instruction length */ i->length = nwu + 1; - DEBUG_NOTIFY("instruction words: [%u]", i->length); + DEBUG_PRINTF("instruction words: [%u]", i->length); for (bits = 0; bits <= (int)nwu; bits++) - DEBUG_NOTIFY(" 0x%04x", i->instr_words[bits]); + DEBUG_PRINTF(" %04x", i->instr_words[bits]); if (incomplete) { - DEBUG_NOTIFY(" (preliminary)"); + DEBUG_PRINTF(" (preliminary)"); } else { i->ready = 1; } - DEBUG_NOTIFY("\n"); + DEBUG_PRINTF("\n"); + + return 0; } -/* thish should grow buffer to fit huge linesh, but I jusht don't care right now, hic */ +/* parse_stream_ + * read lines from stream f + * break each line into parts, populate parts into structures + */ static -int parse_stream_(FILE *f, struct instruction_list_ **il, struct label_list_ **ll, unsigned int allow_short_labels) { +int parse_stream_(FILE *f, const char *src, struct instruction_list_ **il, struct label_list_ **ll, unsigned int allow_short_labels) { struct instruction_ *instr, **instr_list_entry; - char buf[(1<<14)]; + unsigned int line = 0; + int retval = 0; + char buf[0x4000]; buf[sizeof buf - 1] = '\0'; while (fgets(buf, sizeof buf, f)) { + line++; + if (buf[sizeof buf - 1] != '\0') { - fprintf(stderr, "input buffer exhausted\n"); + fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n"); + retval = -1; break; } if (buf_tokenize_(buf, &instr)) { - fprintf(stderr, "trouble tokenizing input\n"); + fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n"); + retval = -1; break; } if (instr) { /* add to list of instructions */ if (instr_list_insert(il, instr)) { - fprintf(stderr, "could not populate instruction list\n"); + fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n"); } instr_list_entry = (*il)->instr + (*il)->entries - 1; - DEBUG_NOTIFY("TRACE: verify %s == %s\n", (*instr_list_entry)->opcode, instr->opcode); + if (instr->label) { if (label_list_find_instr(*ll, instr->label)) { - fprintf(stderr, "duplicate label\n"); + fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n"); break; } if (label_list_insert(ll, instr_list_entry)) { - fprintf(stderr, "could not populate label list\n"); + fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n"); } label_addr_calculate_(*il, *ll); } - instr_bytecodify_(*ll, instr, allow_short_labels); + instr_assemble_(*ll, instr, allow_short_labels); } } if (ferror(f)) { @@ -704,34 +828,40 @@ int parse_stream_(FILE *f, struct instruction_list_ **il, struct label_list_ **l return -1; } - return 0; + return retval; } +/* assemble_check_ + * make a full pass over instruction list to resolve labels + */ static int assemble_check_(struct instruction_list_ *il, struct label_list_ *ll, unsigned int allow_short_labels) { int retval = 0; size_t x; - DEBUG_NOTIFY(" final pass of codifier...\n"); + DEBUG_PRINTF(" final pass of assembler...\n"); for (x = 0; x < il->entries; x++) { - instr_bytecodify_(ll, il->instr[x], allow_short_labels); + retval |= instr_assemble_(ll, il->instr[x], allow_short_labels); + if (retval) { + fprintf(stderr, "instruction failed to assemble\n"); + } } - VERBOSE_NOTIFY("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_"); + VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_"); for (x = 0; x < ll->entries; x++) { if (! ll->label[x].ready) retval |= -1; - VERBOSE_NOTIFY("%3s0x%04x %-32s ", - ll->label[x].ready ? "" : "*", - ll->label[x].addr, - ll->label[x].label); if (verbose_) { + printf("%3s0x%04x %-32s ", + ll->label[x].ready ? "" : "*", + ll->label[x].addr, + ll->label[x].label); instruction_print_(*(ll->label[x].instr), 0); printf("\n"); } } - VERBOSE_NOTIFY("\n"); + VERBOSE_PRINTF("\n"); if (retval) fprintf(stderr, "some labels could not be resolved\n"); @@ -777,9 +907,11 @@ int output_(struct instruction_list_ *il, const char *filename) { total_words += instr->length; } - fprintf(stderr, "wrote 0x%04zx instructions as 0x%04zx words\n", + fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n", + dryrun_ ? "assembled" : "wrote", i, total_words); + return 0; } @@ -846,14 +978,14 @@ int main(int argc, char *argv[]) { continue; } - VERBOSE_NOTIFY("assembling '%s'...\n", filename); - parse_stream_(f, &il_, &ll_, allow_short_labels); + VERBOSE_PRINTF("assembling '%s'...\n", filename); + parse_stream_(f, filename, &il_, &ll_, allow_short_labels); fclose(f); } } else { - VERBOSE_NOTIFY("assembling '%s'...\n", "stdin"); - parse_stream_(stdin, &il_, &ll_, allow_short_labels); + VERBOSE_PRINTF("assembling '%s'...\n", "stdin"); + parse_stream_(stdin, "-", &il_, &ll_, allow_short_labels); } if (assemble_check_(il_, ll_, allow_short_labels)) {