#include <unistd.h>
#include <stdio.h>
#include <string.h>
+#include <strings.h>
#include <errno.h>
#include <sysexits.h>
#include <assert.h>
#include "dcpu16.h"
+#include "common.h"
/*
* quick and dirty assembler for dcpu16
* Justin Wind <justin.wind@gmail.com>
* 2012 04 07 - implementation started
* 2012 04 10 - functional
+ * 2012 04 16 - support dat statements
*
* TODO
* needs ability to specify location for code or data
+ * needs ability to specify label as relative to another label
* short labels not correctly computed
+ * in label struct, store index of instruction rather than ptr, ptrs for iteration in addr calculation are ugly
*/
static const char * const src_id_ = "$Id$";
/* keep an array of instructions as we read them in */
struct instruction_ {
+ size_t src_line;
char *label; /* set if a label points here */
char *opcode; /* tokenized instruction text */
struct operand_ *operands; /* list of operands */
};
-/* routines to support generic grow-able arrays */
-
-struct dynamic_array_ {
- size_t entry_size;
- size_t grow_size;
- size_t allocated;
- size_t entries;
- void *a;
-};
-
-#define DYNARRAY_ITEM(da, index) ( (char *)(da).a + ( (da).entry_size * index ) )
-
-/* allocate and initialize a new generic dynamic array */
-static
-struct dynamic_array_ *dynarray_new_(size_t entry_size, size_t grow_size) {
- struct dynamic_array_ *da;
-
- if (entry_size == 0 || grow_size == 0) {
- fprintf(stderr, "%s: internal error: sizes cannot be zero\n", __func__);
- exit(EX_SOFTWARE);
- }
-
- da = calloc(1, sizeof *da);
- if (da == NULL) {
- fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
- return NULL;
- }
-
- da->entry_size = entry_size;
- da->grow_size = grow_size;
-
- da->a = malloc(da->entry_size * da->grow_size);
- if (da->a == NULL) {
- fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
- }
-
- da->allocated = grow_size;
-
- DEBUG_PRINTF("allocated new dynarray:%p a:%p entry_size:%zu\n", da, da->a, da->entry_size);
-
- return da;
-}
-
-/* copy item onto end of array */
-static
-void *dynarray_add_(struct dynamic_array_ *da, void *item) {
- void *dst;
-
- /* make room, make room */
- if (da->entries == da->allocated) {
- size_t new_allocated = da->allocated + da->grow_size;
- void *tmp_ptr = realloc(da->a, new_allocated * da->entry_size);
- if (tmp_ptr == NULL) {
- fprintf(stderr, "%s():%s\n", "realloc", strerror(errno));
- return NULL;
- }
- da->a = tmp_ptr;
- da->allocated = new_allocated;
-
- DEBUG_PRINTF("grew dynarray:%p\n", da);
- }
-
- dst = DYNARRAY_ITEM(*da, da->entries);
- memcpy(dst, item, da->entry_size);
-
- da->entries++;
-
- DEBUG_PRINTF("added dynarray:%p entry:%zu item:%p\n", da, da->entries, item);
-
- return dst;
-}
-
-
/* locate and return the label entry matching name */
static
-struct label_ *label_find_(struct dynamic_array_ *labels, char *name) {
+struct label_ *label_find_(struct dynamic_array *labels, char *name) {
size_t x;
for (x = 0; x < labels->entries; x++) {
/* if a label has a validly-calculated address, fetch it */
static
-int label_addr_(struct dynamic_array_ *labels, char *name, DCPU16_WORD *addr) {
+int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
struct label_ *l;
if ( (l = label_find_(labels, name)) == NULL )
/* attempt to determine the addresses of all labels */
static
-void label_addr_calculate_(struct dynamic_array_ *instructionps, struct dynamic_array_ *labels) {
+void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
size_t i;
+ /* idea: label1:label2 - calculated as offset between labels */
+
/* for each label.. */
for (i = 0; i < labels->entries; i++) {
struct label_ *l;
l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
+ DEBUG_PRINTFQ("%s: calculating address of label '%s'\n", __func__, l->label);
+
+#if 0
+force full resolution while debugging
/* if it's already calculated, great. */
if (l->ready)
continue;
+#endif
/*
* starting at the instruction for this label,
* until we get to the start or a known prior label address.
* update our label with the freshly calculated addr
*/
- for (instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
- instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0);
- instr--) {
+ /* first fetch the instruction associated with the label we want to know about.. */
+ /* the addr of this instruction will be whatever follows all the preceding instructions */
+ /* so back up one before counting instruction lengths... */
+ instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
+ /* is it the first one? */
+ if (instr == (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0))
+ break;
+
+ instr--;
+
+ while (instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) {
+ if ((*instr)->ready == 0)
+ DEBUG_PRINTF("%s: instr '%s' not ready\n", __func__, (*instr)->opcode);
word_count += (*instr)->length;
+ DEBUG_PRINTF("%s: instr '%s' takes '%u' bytes\n", __func__, (*instr)->opcode, (*instr)->length);
+
/* have we come across an instruction which a label points to?
it should already be calculated, so just add that on and be done */
if ((*instr)->label
word_count += addr;
break;
}
+ instr--;
}
l->addr = word_count;
l->ready = 1;
}
}
-static
-void instr_free_(struct instruction_ *i) {
- if (i->label)
- free(i->label);
- if (i->opcode)
- free(i->opcode);
- while (i->operands) {
- struct operand_ *o = i->operands;
-
- i->operands = o->next;
- free(o);
- }
-
- free(i);
-}
/* generate the nibble for a given basic opcode */
static
*d = *s;
}
+
/* value_bits_
* generate the six bits for a given operand string
* returns -1 if it could not parse the operand
* notes: nextword may be overwritten even if it's not used in final instruction
*/
static
-int value_bits_(struct dynamic_array_ *labels, char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
+int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
static char *operand = NULL;
static size_t operand_sz = 0;
char *reg;
char *constant;
+ DEBUG_PRINTFQ("is multipart.. ");
+
/* eat the plus */
*ep = '\0';
ep++;
*nextwordused += 1;
DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
return 0x10 | register_enumerate_(*reg);
- } else if (errno) {
+ } else if (errno == ERANGE) {
+#if 0
+oh, right, labels fall through
DEBUG_PRINTFQ("is out of range\n");
fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
return -1;
+#endif
}
/* what? still here? assume it's a label, I guess */
int r;
if (with_label)
- r = printf("%-16s %3s", i->label ? i->label : "", i->opcode);
- else
- r = printf("%3s", i->opcode);
+ r = printf("%-16s ", i->label ? i->label : "");
+
+ r = printf("%3s", i->opcode ? i->opcode : "");
for (o = i->operands; o; o = o->next)
r += printf(" %s%s", o->operand, o->next ? "," : "");
return r;
}
-/* parse an instruction out of buf, create new instruction struct if seemingly valid */
-/* does not actually check if instruction is valid yet */
-/* buf must be 0-terminated */
+/* tokenize_line_
+ * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
+ * [label] instruction [operand[,operand[,...]]]
+ * Does no validation of contents of any of these tokens, as of yet.
+ * does not clean up after itself if a malloc fails
+ */
static
-int buf_tokenize_(char *buf, struct instruction_ **next_instr) {
- const char const *sep = " \t\n";
+int tokenize_line_(char *line, struct instruction_ **next_instr) {
+ const char const *whitespace = " \t\n";
+ const char const *quotes = "\"'`";
struct instruction_ *instr = NULL;
- char *label = NULL,
- *opcode = NULL,
- *operand = NULL;
-
- char *x,
- *y,
- *st;
+ char *x, *st, *qt;
+ char *label, *opcode;
+ struct operand_ *operand_list = NULL;
+ struct operand_ **operand_tail = &operand_list;
+ size_t instr_words_needed = 0;
- assert(buf != NULL);
- assert(next_instr != NULL);
+ assert(line);
+ assert(next_instr);
*next_instr = NULL;
- /* kill comments */
- if ((x = strchr(buf, ';')) != NULL)
- *x = '\0';
- /* kill leading whitespace */
- buf += strspn(buf, " \t\n");
- /* kill trailing whitespace */
- if (*buf) {
- x = buf + strlen(buf);
- while (strchr(" \t\n", *x)) {
- *x = '\0';
- x--;
- }
- }
-
- if ((x = strrchr(buf, '\n')) != NULL)
- *x = '\0';
+ /* strip leading whitespace */
+ line += strspn(line, whitespace);
+ if (*line == '\0')
+ return 0;
- /* determine if first token is label, opcode, or we just have a blank line to ignore */
- x = strtok_r(buf, sep, &st);
+ /* set first bare ';' to '\0', thus isolating any comments */
+ /* here we only care about the side-effect of truncating the first separator character */
+ (void)strqtok_r(line, ";", '\\', quotes, &qt, &st);
+ /* we don't care if there was an unmatched quote at this point, let's see what happens */
+ if (*line == '\0')
+ return 0;
- /* empty line? nothing to do here. */
- if (x == NULL)
+ /* carve off the first token, determine if it is a label */
+ x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st);
+ if (x == NULL || *x == '\0')
return 0;
+ if (qt) {
+ /* labels could contain an unmatched quote character, I guess? */
+ qt = NULL;
+ }
+
+ /* we have something, try to make sense of what it is */
+
+#ifdef NON_SPEC_LABELS
+ /* I want my labels like 'label:' */
+ if ( *(x + strlen(line) - 1) == ':' ) {
+ *(x + strlen(line) - 1) = '\0';
+ DEBUG_PRINTF("label: %s\n", x);
-#ifdef OTHER_LABELS
- /* labels end with :, otherwise its an opcode */
- y = x + strlen(x) - 1;
- if (*y == ':') {
- *y = '\0';
label = x;
- opcode = strtok_r(NULL, sep, &st);
+
+ opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
+ } else {
+ label = NULL;
+ opcode = x;
}
-#else /* OTHER_LABELS */
- /* labels.. begin? with ':' ? okay, I guess. Whatever. */
- /* otherwise, it's an opcode */
+#endif /* NON_SPEC_LABELS */
+
+ /* spec gives example of labels as ':label' */
if (*x == ':') {
- label = x + 1;
- opcode = strtok_r(NULL, sep, &st);
+ *x = '\0';
+ x++;
+ label = x;
+ opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
} else {
label = NULL;
opcode = x;
}
-#endif /* OTHER_LABELS */
+ /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */
- if (opcode) {
- operand = st;
- }
+ if (opcode && *opcode) {
+ /* if we have an opcode, we'll need at least one word to compile instruction */
+ instr_words_needed++;
- /* extra room for assembled words */
- instr = calloc(1, 3 + sizeof *instr);
- if (instr == NULL) {
- fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
- return -1;
- }
+ /* build a list of operands to hang off this instruction */
+ while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) {
+ struct operand_ *new_operand;
+ char *y;
- instr->label = label ? strdup(label) : NULL;
- instr->opcode = opcode ? strdup(opcode) : NULL;
+ /* trim whitespaces */
+ x += strspn(x, whitespace);
- if (operand) {
- struct operand_ **o_next = &instr->operands;
+ if (*x) {
+ for (y = x + strlen(x) - 1; *y; y--) {
+ if (strchr(whitespace, *y)) {
+ *y = '\0';
+ }
+ }
+ }
+ /* nothing left? */
+ if (*x == '\0') {
+ fprintf(stderr, "null operand encountered\n");
+ return -1;
+ }
- for (x = strtok_r(operand, ",", &st);
- x;
- x = strtok_r(NULL, ",", &st) ) {
- *o_next = malloc(3 + sizeof **o_next); /* FIXME: handle this on the fly later */
+ DEBUG_PRINTF("tokenized operand '%s'\n", x);
- if (*o_next == NULL) {
- fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
- instr_free_(instr);
+ new_operand = malloc(sizeof *new_operand);
+ if (new_operand == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
return -1;
}
- /* trim */
- x += strspn(x, " \t\n");
- if (*x) {
- y = x + strlen(x) - 1;
- while (strchr(" \t\n", *y)) {
- *y = '\0';
- y--;
- }
+ new_operand->operand = strdup(x);
+ if (new_operand->operand == NULL) {
+ fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
+ return -1;
+ }
+
+ new_operand->next = NULL;
+
+ if (strchr(quotes, x[0])) {
+ /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */
+ instr_words_needed += strlen(x) - 1;
}
+ instr_words_needed++;
- (*o_next)->operand = strdup(x);
- (*o_next)->next = NULL;
- o_next = &((*o_next)->next);
+ *operand_tail = new_operand;
+ operand_tail = &(*operand_tail)->next;
}
}
+ DEBUG_PRINTF("allocating new instruction with room for %zu bytes\n", instr_words_needed);
+
+ instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
+ if (instr == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+
+ if (label) {
+ instr->label = strdup(label);
+ if (instr->label == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+ } else {
+ label = NULL;
+ }
+
+ if (opcode) {
+ instr->opcode = strdup(opcode);
+ if (instr->opcode == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+ } else {
+ opcode = NULL;
+ }
+
+ instr->operands = operand_list;
+
*next_instr = instr;
return 0;
}
/* try to generate bytecode for an instruction */
+/* returns -1 on unrecoverable error */
static
-int instr_assemble_(struct dynamic_array_ *labels, struct instruction_ *i, unsigned int allow_short_labels) {
+int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
unsigned int nwu = 0; /* number of words used */
unsigned int incomplete = 0;
int bits;
struct operand_ *o = i->operands;
if (opt_.verbose > 2) {
- printf("%s: assembling %p ", __func__, i);
+ printf("%s: assembling %p ", __func__, (void *)i);
instruction_print_(i, 1);
- printf("\n");
+ printf("(line %zu)\n", i->src_line);
}
+#if 0
+while debugging, always reassemble
if (i->ready) {
/* already assembled, nothing to do */
return 0;
}
+#endif
+
+ if (i->opcode == NULL) {
+ assert(i->label);
+ assert(i->operands == NULL);
+ /* just a label, move along */
+ i->length = 0;
+ i->ready = 1;
+ return 0;
+ }
/* special case DAT */
if (strncasecmp(i->opcode, "DAT", 3) == 0) {
- /* just dump operands into words, I guess */
- fprintf(stderr, "FIXME unhandled raw data\n");
- /* count total length of data.. */
- /* realloc instruction */
- /* populate words */
+ DEBUG_PRINTF("processing DAT...\n");
+
+ i->length = 0;
+
+ for ( /* */ ; o; o = o->next) {
+ size_t j, dat_len;
+ char *x;
+ unsigned long l;
+
+ DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, (void *)o->next);
+
+ /* is this a string? */
+ /* does it start with a quote, and end with the same quote? */
+ if ( (x = strchr("\"'`", o->operand[0])) ) {
+ dat_len = strlen(o->operand) - 1;
+ if (o->operand[dat_len] == *x) {
+ /* it is a string */
+ DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
+
+ for (j = 0, x = o->operand + 1;
+ j < dat_len - 1;
+ j++, x++) {
+ i->instr_words[i->length] = *x;
+ i->length++;
+ }
+ /* Note that strings in DAT do not include their zero-terminators */
+ /* specify as 'DAT "string", 0' */
+ }
+ continue;
+ }
+
+ /* is this a number? */
+ char *ep;
+ errno = 0;
+ l = strtoul(o->operand, &ep, 0);
+ if (errno == 0
+ && (*o->operand && (*ep == '\0')) ) {
+ /* conversion succeeded */
+ if (l > 0xffff) {
+ fprintf(stderr, "value '%lu' out of range\n", l);
+ return -1;
+ }
+ i->instr_words[i->length] = l;
+ i->length++;
+ continue;
+ }
+
+ /* otherwise assume it's a label, even if we don't know what it is */
+ if (label_addr_(labels, o->operand, &i->instr_words[i->length])) {
+ DEBUG_PRINTF("(deferred label '%s' resolution)\n", o->operand);
+ incomplete = 1;
+ }
+ i->length++;
+ }
+
+ if (incomplete) {
+ DEBUG_PRINTF("pending label address\n");
+ } else {
+ i->ready = 1;
+ }
+
return 0;
- }
+ } /* end of DAT */
/* start with opcode bits */
bits = opcode_bits_(i->opcode);
return -1;
}
- bits = value_bits_(labels, o->operand, i->instr_words + nwu + 1, &nwu, allow_short_labels);
+ bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
if (bits == -1) {
fprintf(stderr, "couldn't assemble instruction\n");
return -1;
* break each line into parts, populate parts into structures
*/
static
-int parse_stream_(FILE *f, const char *src, struct dynamic_array_ *instructionps, struct dynamic_array_ *labels, unsigned int allow_short_labels) {
+int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
struct instruction_ *instr, **instr_list_entry;
unsigned int line = 0;
int retval = 0;
break;
}
- if (buf_tokenize_(buf, &instr)) {
+ if (tokenize_line_(buf, &instr)) {
fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
retval = -1;
break;
}
if (instr) {
+ instr->src_line = line;
/* add to list of instructions */
- instr_list_entry = dynarray_add_(instructionps, &instr);
+ instr_list_entry = dynarray_add(instructionps, &instr);
if (instr_list_entry == NULL) {
fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
break;
break;
}
- if (dynarray_add_(labels, &new_label) == NULL) {
+ if (dynarray_add(labels, &new_label) == NULL) {
fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
break;
}
label_addr_calculate_(instructionps, labels);
}
- instr_assemble_(labels, instr, allow_short_labels);
+ if (instr_assemble_(labels, instr, allow_short_labels)) {
+ fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n");
+ break;
+ }
}
}
if (ferror(f)) {
* make a full pass over instruction list to resolve labels
*/
static
-int assemble_check_(struct dynamic_array_ *instructionps, struct dynamic_array_ *labels, unsigned int allow_short_labels) {
+int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
int retval = 0;
size_t x;
+ /* fixing short labels .... */
+ /* by here we have our list of instructions and their maximum instruction lengths */
+ /* and we have a list of addresses, based on those maximum lengths */
+ /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
+ /* and reassemble.. */
+ /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
+ /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
+
+ /* try this? keep another list of locations a label address is used */
+ /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
+
DEBUG_PRINTF(" final pass of assembler...\n");
for (x = 0; x < instructionps->entries; x++) {
struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
- retval |= instr_assemble_(labels, *instrp, allow_short_labels);
+ retval = instr_assemble_(labels, *instrp, allow_short_labels);
if (retval) {
fprintf(stderr, "instruction %zu failed to assemble\n", x);
+ return retval;
+ }
+ if (! (*instrp)->ready) {
+ fprintf(stderr, "instruction not resolvable\n");
+ return -1;
}
}
return retval;
}
+/* output_
+ * write assembled words to named file
+ */
static
-int output_(struct dynamic_array_ *instructionps, const char *filename) {
+int output_(struct dynamic_array *instructionps, const char *filename) {
FILE *of = NULL;
struct instruction_ **instrp;
size_t i, r, total_words = 0;
return 0;
}
-static struct dynamic_array_ *instructionps_;
-static struct dynamic_array_ *labels_;
+static struct dynamic_array *instructionps_;
+static struct dynamic_array *labels_;
int main(int argc, char *argv[]) {
const char *out_filename = NULL;
out_filename = out_filename_default_;
/* init tables */
- instructionps_ = dynarray_new_(sizeof (struct instruction_ *), 1024);
- labels_ = dynarray_new_(sizeof(struct label_), 256);
+ instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
+ labels_ = dynarray_new(sizeof(struct label_), 256);
if (instructionps_ == NULL
|| labels_ == NULL) {
fprintf(stderr, "failed to initialize\n");
}
VERBOSE_PRINTF("assembling '%s'...\n", filename);
- parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
-
+ c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
fclose(f);
+ if (c)
+ break;
}
} else {
VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
- parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
+ c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
+ }
+ if (c) {
+ fprintf(stderr, "could not parse input, aborting\n");
+ exit(EX_DATAERR);
}
if (assemble_check_(instructionps_, labels_, allow_short_labels)) {