/*
* quick and dirty assembler for dcpu16
*
+ * TODO
+ * needs ability to specify location for code or data
*/
static const char * const src_id_ = "$Id$";
unsigned int dryrun_ = 0;
-#define DEBUG_NOTIFY(...) do { if (verbose_ > 2) fprintf(stderr, __VA_ARGS__); } while (0)
-#define VERBOSE_NOTIFY(...) do { if (verbose_) printf(__VA_ARGS__); } while (0)
+#define DEBUG_PRINTF(...) do { if (verbose_ > 2) printf(__VA_ARGS__); } while (0)
+#define VERBOSE_PRINTF(...) do { if (verbose_) printf(__VA_ARGS__); } while (0)
static
(*ll)->allocated = new_allocated;
}
- DEBUG_NOTIFY("TRACE: adding label '%s'\n", (*instr)->label);
+ DEBUG_PRINTF("TRACE: adding label '%s'\n", (*instr)->label);
(*ll)->label[(*ll)->entries].label = (*instr)->label;
(*ll)->label[(*ll)->entries].instr = instr;
}
ll->label[i].addr = word_count;
ll->label[i].ready = 1;
- DEBUG_NOTIFY("label '%s' has addr of 0x%04x\n", ll->label[i].label, word_count);
+ DEBUG_PRINTF("label '%s' has addr of 0x%04x\n", ll->label[i].label, word_count);
}
}
return -1;
}
-/* generate the six bits for a given operand */
-/* FIXME: MAEK BETTR */
-/* notes: nextword may be rewritten even if it's not used in final instruction */
+/* removes all occurences of chars from buf */
+static inline
+void buf_strip_chars_(char *buf, char *chars) {
+ char *s, *d;
+
+ for (s = d = buf; *s; s++, d++) {
+ while (*s && strchr(chars, *s)) {
+ s++;
+ }
+ if (!*s)
+ break;
+ *d = *s;
+ }
+ *d = *s;
+}
+
+/* value_bits_
+ * generate the six bits for a given operand string
+ * returns -1 if it could not parse the operand
+ * returns -2 if it could not parse the operand due to an unresolved label
+ * notes: nextword may be overwritten even if it's not used in final instruction
+ */
static
int value_bits_(struct label_list_ *ll, char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
- unsigned int retval = -1;
+ static char *operand = NULL;
+ static size_t operand_sz = 0;
+
unsigned long l;
- char *operand, *o, *ep;
+ char *o, *ep;
+
+ /*
+ Our operand working buffer shouldn't ever need to be too big,
+ but DAT might blow that assumption.
+ */
+ if (operand_sz <= strlen(operand_orig)) {
+ void *tmp_ptr;
+ size_t new_sz = strlen(operand_orig);
+
+ if (new_sz < 256)
+ new_sz = 256;
+ new_sz += 256;
+
+ DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
+ tmp_ptr = realloc(operand, new_sz);
+ if (tmp_ptr == NULL) {
+ fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
+ return -1;
+ }
+ operand = tmp_ptr;
+ operand_sz = new_sz;
+ }
- operand = o = strdup(operand_orig);
+ o = strcpy(operand, operand_orig);
- DEBUG_NOTIFY("TRACE: operand '%s' is ", operand);
+ DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
- if (strlen(operand) == 1) {
- if ( (strchr("ABCXYZIJ", *operand))
- || (strchr("abcxyzij", *operand)) ) {
- DEBUG_NOTIFY("register\n");
- retval = register_enumerate_(*operand);
- goto done;
- }
+ /* this is a very stupid parser */
+
+ /* first, let's trim all whitespace out of string at once to make parsing easier */
+ buf_strip_chars_(operand, " \t\n");
+
+ /* single character might match a register */
+ if (strlen(operand) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *operand)) {
+ DEBUG_PRINTF("is register %c\n", *operand);
+ return register_enumerate_(*operand);
+ }
+
+ /* easy matches */
+ if (strcasecmp(operand, "POP") == 0) {
+ DEBUG_PRINTF("is POP\n");
+ return 0x18;
+ }
+ if (strcasecmp(operand, "PUSH") == 0) {
+ DEBUG_PRINTF("is PUSH\n");
+ return 0x19;
+ }
+ if (strcasecmp(operand, "PEEK") == 0) {
+ DEBUG_PRINTF("is PEEK\n");
+ return 0x1a;
+ }
+ if (strcasecmp(operand, "SP") == 0) {
+ DEBUG_PRINTF("is register SP\n");
+ return 0x1b;
+ }
+ if (strcasecmp(operand, "PC") == 0) {
+ DEBUG_PRINTF("is register PC\n");
+ return 0x1c;
+ }
+ if (strcasecmp(operand, "O") == 0) {
+ DEBUG_PRINTF("is register O\n");
+ return 0x1d;
}
+ /* is the operand [bracketed]? */
if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
+ /* eat the brackets */
operand[strlen(operand) - 1] = '\0';
operand++;
- /* trim whitespaces */
- while (strchr(" \t\n", *operand)) operand++;
- ep = operand + strlen(operand) - 1;
-
- if (strlen(operand) == 1) {
- DEBUG_NOTIFY("dereferenced register\n");
- retval = 0x08 | register_enumerate_(*operand);
- goto done;
+
+ /* is it [register]? */
+ if (strlen(operand) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *operand)) {
+ DEBUG_PRINTF("is dereferenced register %c\n", *operand);
+ return 0x08 | register_enumerate_(*operand);
}
+ /* is it [register+something]? */
if ( (ep = strchr(operand, '+')) ) {
- char reg;
+ char *reg;
char *constant;
- while (strchr("+ \t\n", *ep)) {
- *ep = '\0';
- ep++;
- }
- if (strlen(ep) == 1) {
- reg = *ep;
+ /* eat the plus */
+ *ep = '\0';
+ ep++;
+
+ /* figure out which one is which */
+ if (strlen(ep) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *ep)) {
+ reg = ep;
constant = operand;
- } else if (strlen(operand) == 1) {
- reg = *operand;
+ } else if (strlen(operand) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *operand) ) {
+ reg = operand;
constant = ep;
} else {
- fprintf(stderr, "couldn't parse operand\n");
- goto done;
+ DEBUG_PRINTF("is unparsable\n");
+ fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
+ return -1;
}
- if ( strchr("ABCXYZIJ", reg)
- || strchr("abcxyzij", reg) ) {
- l = strtoul(constant, &ep, 0);
+ /* check if something is understandable as a value */
+ errno = 0;
+ l = strtoul(constant, &ep, 0);
+ if (errno == 0
+ && (*constant && (*ep == '\0')) ) {
+ /* string conversion went without issue */
+ /* validate it will fit in a word */
+ if (l > 0xffff) {
+ DEBUG_PRINTF("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ /* seems fine */
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ DEBUG_PRINTF("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
+ return 0x10 | register_enumerate_(*reg);
+ } else if (errno) {
+ DEBUG_PRINTF("is out of range\n");
+ fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
+ return -1;
}
- DEBUG_NOTIFY("dereferenced register+constant\n");
+ /* what? still here? assume it's a label, I guess */
+ /* try to populate nextword with label address */
+ if (label_list_find_addr(ll, operand, nextword)) {
+ DEBUG_PRINTF("(deferred label resolution)\n");
+ *nextwordused += 1;
+ return -2;
+ }
+ DEBUG_PRINTF("is a dereferenced register (%c) + label\n", *reg);
+ *nextwordused += 1;
+ return 0x10 | register_enumerate_(*reg);
+ }
- DEBUG_NOTIFY("\tregister_index:%u %c\n", reg, register_enumerate_(reg));
- DEBUG_NOTIFY("\tconstant:%lu\n", l);
+ /* it must just be a dereferenced literal then */
+ errno = 0;
+ l = strtoul(operand, &ep, 0);
+ if (errno == 0
+ && (*operand && (*ep == '\0')) ) {
+ /* string conversion went without issue */
+ /* validate it will fit in a word */
+ if (l > 0xffff) {
+ DEBUG_PRINTF("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ DEBUG_PRINTF("is a dereferenced literal value (%hu)\n", *nextword);
*nextword = l & 0xffff;
*nextwordused += 1;
- retval = 0x10 | register_enumerate_(reg);
- goto done;
+ return 0x1e;
+ } else if (errno) {
+ DEBUG_PRINTF("is out of range\n");
+ fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
}
- l = strtoul(operand, &ep, 0);
- DEBUG_NOTIFY("dereferenced literal value %lu...\n", l);
- *nextword = l & 0xffff;
+ /* not a number? try a label */
+ if (label_list_find_addr(ll, operand, nextword)) {
+ DEBUG_PRINTF("(deferred label resolution)\n");
+ *nextwordused += 1;
+ return -2;
+ }
+ DEBUG_PRINTF("is a dereferenced label\n");
*nextwordused += 1;
- retval = 0x1e;
- goto done;
+ return 0x1e;
}
- if (strcasecmp(operand, "POP") == 0) {
- DEBUG_NOTIFY("POP\n");
- retval = 0x18;
- goto done;
- }
- if (strcasecmp(operand, "PUSH") == 0) {
- DEBUG_NOTIFY("PUSH\n");
- retval = 0x19;
- goto done;
- }
- if (strcasecmp(operand, "PEEK") == 0) {
- DEBUG_NOTIFY("PEEK\n");
- retval = 0x1a;
- goto done;
- }
- if (strcasecmp(operand, "SP") == 0) {
- DEBUG_NOTIFY("sp register\n");
- retval = 0x1b;
- goto done;
- }
- if (strcasecmp(operand, "PC") == 0) {
- DEBUG_NOTIFY("pc register\n");
- retval = 0x1c;
- goto done;
- }
- if (strcasecmp(operand, "O") == 0) {
- DEBUG_NOTIFY("o register\n");
- retval = 0x1d;
- goto done;
- }
+ /* left with a literal or a label, then */
+ errno = 0;
l = strtoul(operand, &ep, 0);
+ if (errno == 0
+ || (*operand && (*ep == '\0')) ) {
+ if (l > 0xffff) {
+ DEBUG_PRINTF("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
- if (operand && *ep == '\0') {
- DEBUG_NOTIFY("literal value %lu...\n", l);
+ DEBUG_PRINTF("is literal value (%lu)\n", l);
if (l < 0x20) {
- retval = l + 0x20;
- goto done;
- } else {
- *nextword = l & 0xffff;
- *nextwordused += 1;
- retval = 0x1f;
- goto done;
+ return l + 0x20;
}
+
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ return 0x1f;
}
/* try to populate nextword with label address */
if (label_list_find_addr(ll, operand, nextword)) {
- DEBUG_NOTIFY("currently-unknown label...\n");
+ DEBUG_PRINTF("(deferred label resolution)\n");
/* assume non-small literal value */
*nextwordused += 1;
- goto done;
+ return -2;
}
- DEBUG_NOTIFY("label '%s' 0x%02hx\n", operand, *nextword);
+ DEBUG_PRINTF("is label '%s' (0x%02hx)\n", operand, *nextword);
if (*nextword < 0x20 && allow_short_labels) {
- DEBUG_NOTIFY("small value label win\n");
- retval = (0x20 + *nextword) & 0x3f;
- goto done;
+ DEBUG_PRINTF("small value label win\n");
+ return (0x20 + *nextword) & 0x3f;
}
- retval = 0x1f;
*nextwordused += 1;
-
-done:
- free(o);
- return retval;
+ return 0x1f;
}
static inline
/* try to generate bytecode for an instruction */
static
-void instr_bytecodify_(struct label_list_ *ll, struct instruction_ *i, unsigned int allow_short_labels) {
+int instr_assemble_(struct label_list_ *ll, struct instruction_ *i, unsigned int allow_short_labels) {
unsigned int nwu = 0; /* number of words used */
unsigned int incomplete = 0;
int bits;
struct operand_ *o = i->operands;
- DEBUG_NOTIFY("TRACE: codifying %s%s'%s'...", i->label ? i->label : "", i->label ? ":" : "", i->opcode);
+ if (verbose_ > 2) {
+ printf("%s: assembling ", __func__);
+ instruction_print_(i,1);
+ printf("\n");
+ }
if (i->ready) {
- /* already codified */
- return;
+ /* already assembled, nothing to do */
+ return 0;
}
/* special case DAT */
/* count total length of data.. */
/* realloc instruction */
/* populate words */
- return;
+ return 0;
}
/* start with opcode bits */
bits = opcode_bits_(i->opcode);
if (bits < 0) {
- fprintf(stderr, "unrecognized instruction '%s'\n", i->opcode);
- return;
+ fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
+ for (o = i->operands; o; o = o->next)
+ fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
+ fprintf(stderr, "'\n");
+ return -1;
}
i->instr_words[0] |= 0x0f & bits;
if ((bits & 0x0f) == 0) {
bits = nbi_opcode_bits_(i->opcode);
if (bits < 0) {
- fprintf(stderr, "internal error: missing instruction in nbi opcode table\n");
- return;
+ fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
+ exit(EX_SOFTWARE);
}
} else {
if (o == NULL) {
fprintf(stderr, "'%s' requires more operands\n", i->opcode);
- return;
+ return -1;
}
bits = value_bits_(ll, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
- if (bits < 0) {
- DEBUG_NOTIFY("TRACE: unresolved label\n");
+ if (bits == -1) {
+ fprintf(stderr, "couldn't assemble instruction\n");
+ return -1;
+ } else if (bits == -2) {
+ DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
/* keep going, but don't finalize until we can calculate label address */
incomplete = 1;
bits = 0;
if (o == NULL) {
fprintf(stderr, "'%s' requires more operands\n", i->opcode);
- return;
+ return -1;
}
bits = value_bits_(ll, o->operand, i->instr_words + nwu + 1, &nwu, allow_short_labels);
- if (bits < 0) {
- DEBUG_NOTIFY("TRACE: unresolved label\n");
+ if (bits == -1) {
+ fprintf(stderr, "couldn't assemble instruction\n");
+ return -1;
+ } else if (bits == -2) {
+ DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
/* keep going, but don't finalize until we can calculate label address */
incomplete = 1;
bits = 0;
o = o->next;
i->instr_words[0] |= (bits & 0x3f) << 10;
- /* counting labels as words, we now know the maximum instruction length */
- /* if label is < 0x20, it can take up less space */
+ if (o != NULL) {
+ fprintf(stderr, "too many operands\n");
+ return -1;
+ }
+
+ /* counting labels as words, we now know at least the maximum instruction length */
i->length = nwu + 1;
- DEBUG_NOTIFY("instruction words: [%u]", i->length);
+ DEBUG_PRINTF("instruction words: [%u]", i->length);
for (bits = 0; bits <= (int)nwu; bits++)
- DEBUG_NOTIFY(" 0x%04x", i->instr_words[bits]);
+ DEBUG_PRINTF(" %04x", i->instr_words[bits]);
if (incomplete) {
- DEBUG_NOTIFY(" (preliminary)");
+ DEBUG_PRINTF(" (preliminary)");
} else {
i->ready = 1;
}
- DEBUG_NOTIFY("\n");
+ DEBUG_PRINTF("\n");
+
+ return 0;
}
-/* thish should grow buffer to fit huge linesh, but I jusht don't care right now, hic */
+/* parse_stream_
+ * read lines from stream f
+ * break each line into parts, populate parts into structures
+ */
static
-int parse_stream_(FILE *f, struct instruction_list_ **il, struct label_list_ **ll, unsigned int allow_short_labels) {
+int parse_stream_(FILE *f, const char *src, struct instruction_list_ **il, struct label_list_ **ll, unsigned int allow_short_labels) {
struct instruction_ *instr, **instr_list_entry;
- char buf[(1<<14)];
+ unsigned int line = 0;
+ int retval = 0;
+ char buf[0x4000];
buf[sizeof buf - 1] = '\0';
while (fgets(buf, sizeof buf, f)) {
+ line++;
+
if (buf[sizeof buf - 1] != '\0') {
- fprintf(stderr, "input buffer exhausted\n");
+ fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
+ retval = -1;
break;
}
if (buf_tokenize_(buf, &instr)) {
- fprintf(stderr, "trouble tokenizing input\n");
+ fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
+ retval = -1;
break;
}
if (instr) {
/* add to list of instructions */
if (instr_list_insert(il, instr)) {
- fprintf(stderr, "could not populate instruction list\n");
+ fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
}
instr_list_entry = (*il)->instr + (*il)->entries - 1;
- DEBUG_NOTIFY("TRACE: verify %s == %s\n", (*instr_list_entry)->opcode, instr->opcode);
+
if (instr->label) {
if (label_list_find_instr(*ll, instr->label)) {
- fprintf(stderr, "duplicate label\n");
+ fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
break;
}
if (label_list_insert(ll, instr_list_entry)) {
- fprintf(stderr, "could not populate label list\n");
+ fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
}
label_addr_calculate_(*il, *ll);
}
- instr_bytecodify_(*ll, instr, allow_short_labels);
+ instr_assemble_(*ll, instr, allow_short_labels);
}
}
if (ferror(f)) {
return -1;
}
- return 0;
+ return retval;
}
+/* assemble_check_
+ * make a full pass over instruction list to resolve labels
+ */
static
int assemble_check_(struct instruction_list_ *il, struct label_list_ *ll, unsigned int allow_short_labels) {
int retval = 0;
size_t x;
- DEBUG_NOTIFY(" final pass of codifier...\n");
+ DEBUG_PRINTF(" final pass of assembler...\n");
for (x = 0; x < il->entries; x++) {
- instr_bytecodify_(ll, il->instr[x], allow_short_labels);
+ retval |= instr_assemble_(ll, il->instr[x], allow_short_labels);
+ if (retval) {
+ fprintf(stderr, "instruction failed to assemble\n");
+ }
}
- VERBOSE_NOTIFY("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
+ VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
for (x = 0; x < ll->entries; x++) {
if (! ll->label[x].ready)
retval |= -1;
- VERBOSE_NOTIFY("%3s0x%04x %-32s ",
- ll->label[x].ready ? "" : "*",
- ll->label[x].addr,
- ll->label[x].label);
if (verbose_) {
+ printf("%3s0x%04x %-32s ",
+ ll->label[x].ready ? "" : "*",
+ ll->label[x].addr,
+ ll->label[x].label);
instruction_print_(*(ll->label[x].instr), 0);
printf("\n");
}
}
- VERBOSE_NOTIFY("\n");
+ VERBOSE_PRINTF("\n");
if (retval)
fprintf(stderr, "some labels could not be resolved\n");
total_words += instr->length;
}
- fprintf(stderr, "wrote 0x%04zx instructions as 0x%04zx words\n",
+ fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
+ dryrun_ ? "assembled" : "wrote",
i,
total_words);
+
return 0;
}
continue;
}
- VERBOSE_NOTIFY("assembling '%s'...\n", filename);
- parse_stream_(f, &il_, &ll_, allow_short_labels);
+ VERBOSE_PRINTF("assembling '%s'...\n", filename);
+ parse_stream_(f, filename, &il_, &ll_, allow_short_labels);
fclose(f);
}
} else {
- VERBOSE_NOTIFY("assembling '%s'...\n", "stdin");
- parse_stream_(stdin, &il_, &ll_, allow_short_labels);
+ VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
+ parse_stream_(stdin, "-", &il_, &ll_, allow_short_labels);
}
if (assemble_check_(il_, ll_, allow_short_labels)) {