#include <assert.h>
#include "dcpu16.h"
+#include "common.h"
/*
* quick and dirty assembler for dcpu16
* Justin Wind <justin.wind@gmail.com>
* 2012 04 07 - implementation started
* 2012 04 10 - functional
+ * 2012 04 16 - support dat statements
*
* TODO
* needs ability to specify location for code or data
};
-/* routines to support generic grow-able arrays */
-
-struct dynamic_array_ {
- size_t entry_size;
- size_t grow_size;
- size_t allocated;
- size_t entries;
- void *a;
-};
-
-#define DYNARRAY_ITEM(da, index) ( (char *)(da).a + ( (da).entry_size * index ) )
-
-/* allocate and initialize a new generic dynamic array */
-static
-struct dynamic_array_ *dynarray_new_(size_t entry_size, size_t grow_size) {
- struct dynamic_array_ *da;
-
- if (entry_size == 0 || grow_size == 0) {
- fprintf(stderr, "%s: internal error: sizes cannot be zero\n", __func__);
- exit(EX_SOFTWARE);
- }
-
- da = calloc(1, sizeof *da);
- if (da == NULL) {
- fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
- return NULL;
- }
-
- da->entry_size = entry_size;
- da->grow_size = grow_size;
-
- da->a = malloc(da->entry_size * da->grow_size);
- if (da->a == NULL) {
- fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
- }
-
- da->allocated = grow_size;
-
- DEBUG_PRINTF("allocated new dynarray:%p a:%p entry_size:%zu\n", da, da->a, da->entry_size);
-
- return da;
-}
-
-/* copy item onto end of array */
-static
-void *dynarray_add_(struct dynamic_array_ *da, void *item) {
- void *dst;
-
- /* make room, make room */
- if (da->entries == da->allocated) {
- size_t new_allocated = da->allocated + da->grow_size;
- void *tmp_ptr = realloc(da->a, new_allocated * da->entry_size);
- if (tmp_ptr == NULL) {
- fprintf(stderr, "%s():%s\n", "realloc", strerror(errno));
- return NULL;
- }
- da->a = tmp_ptr;
- da->allocated = new_allocated;
-
- DEBUG_PRINTF("grew dynarray:%p\n", da);
- }
-
- dst = DYNARRAY_ITEM(*da, da->entries);
- memcpy(dst, item, da->entry_size);
-
- da->entries++;
-
- DEBUG_PRINTF("added dynarray:%p entry:%zu item:%p\n", da, da->entries, item);
-
- return dst;
-}
-
-
/* locate and return the label entry matching name */
static
-struct label_ *label_find_(struct dynamic_array_ *labels, char *name) {
+struct label_ *label_find_(struct dynamic_array *labels, char *name) {
size_t x;
for (x = 0; x < labels->entries; x++) {
/* if a label has a validly-calculated address, fetch it */
static
-int label_addr_(struct dynamic_array_ *labels, char *name, DCPU16_WORD *addr) {
+int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
struct label_ *l;
if ( (l = label_find_(labels, name)) == NULL )
/* attempt to determine the addresses of all labels */
static
-void label_addr_calculate_(struct dynamic_array_ *instructionps, struct dynamic_array_ *labels) {
+void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
size_t i;
/* for each label.. */
instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0);
instr--) {
+ if ((*instr)->ready)
+ DEBUG_PRINTF("%s: instr not ready\n", __func__);
word_count += (*instr)->length;
/* have we come across an instruction which a label points to?
}
}
-static
-void instr_free_(struct instruction_ *i) {
- if (i->label)
- free(i->label);
- if (i->opcode)
- free(i->opcode);
- while (i->operands) {
- struct operand_ *o = i->operands;
-
- i->operands = o->next;
- free(o);
- }
-
- free(i);
-}
/* generate the nibble for a given basic opcode */
static
*d = *s;
}
+
/* value_bits_
* generate the six bits for a given operand string
* returns -1 if it could not parse the operand
* notes: nextword may be overwritten even if it's not used in final instruction
*/
static
-int value_bits_(struct dynamic_array_ *labels, char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
+int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
static char *operand = NULL;
static size_t operand_sz = 0;
return r;
}
-/* parse an instruction out of buf, create new instruction struct if seemingly valid */
-/* does not actually check if instruction is valid yet */
-/* buf must be 0-terminated */
+/* buf_tokenize_
+ * Parses the zero-terminated line of input 'buf' into a newly-allocated struct instruction_.
+ * [label] opcode [operand[,operand[,...]]]
+ * Does not yet validate if labels, opcodes, or operands are valid...
+ */
static
int buf_tokenize_(char *buf, struct instruction_ **next_instr) {
const char const *sep = " \t\n";
+ const char const *quot = "'`\"";
struct instruction_ *instr = NULL;
+ struct operand_ *operand_list = NULL;
char *label = NULL,
*opcode = NULL,
*operand = NULL;
-
char *x,
*y,
- *st;
+ *st, *qt;
+ size_t instr_words_needed = 1;
assert(buf != NULL);
assert(next_instr != NULL);
*next_instr = NULL;
- /* kill comments */
- if ((x = strchr(buf, ';')) != NULL)
- *x = '\0';
/* kill leading whitespace */
buf += strspn(buf, " \t\n");
- /* kill trailing whitespace */
- if (*buf) {
- x = buf + strlen(buf);
- while (strchr(" \t\n", *x)) {
- *x = '\0';
- x--;
- }
- }
- if ((x = strrchr(buf, '\n')) != NULL)
+ /* kill trailing whitespace */
+ for (x = buf + strlen(buf); *x && strchr(sep, *x); x--)
*x = '\0';
- /* determine if first token is label, opcode, or we just have a blank line to ignore */
- x = strtok_r(buf, sep, &st);
+ /* split on first non-quoted ';', ignore following comment */
+ x = strqtok_r(buf, ";", '\\', quot, &qt, &st);
+ if (x == NULL)
+ return 0;
+ if (qt) {
+ fprintf(stderr, "unmatched %c-quote\n", *qt);
+ return -1;
+ }
- /* empty line? nothing to do here. */
+ /* determine if first token is label, opcode, or we just have a blank line to ignore */
+ x = strqtok_r(x, sep, '\\', quot, &qt, &st);
if (x == NULL)
return 0;
+ if (qt) {
+ fprintf(stderr, "unmatched %c-quote\n", *qt);
+ return -1;
+ }
-#ifdef OTHER_LABELS
+/* I want c-style labels in my asm, but example in spec uses : in prefix rather than postfix */
+#ifdef NON_SPEC_LABELS
/* labels end with :, otherwise its an opcode */
y = x + strlen(x) - 1;
if (*y == ':') {
*y = '\0';
label = x;
- opcode = strtok_r(NULL, sep, &st);
+ opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
+ } else {
+ label = NULL;
+ opcode = x;
}
-#else /* OTHER_LABELS */
+#else /* NON_SPEC_LABELS */
/* labels.. begin? with ':' ? okay, I guess. Whatever. */
/* otherwise, it's an opcode */
if (*x == ':') {
label = x + 1;
- opcode = strtok_r(NULL, sep, &st);
+ opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
} else {
label = NULL;
opcode = x;
}
-#endif /* OTHER_LABELS */
+#endif /* NON_SPEC_LABELS */
if (opcode) {
operand = st;
}
- /* extra room for assembled words */
- instr = calloc(1, 3 + sizeof *instr);
- if (instr == NULL) {
- fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
- return -1;
- }
-
- instr->label = label ? strdup(label) : NULL;
- instr->opcode = opcode ? strdup(opcode) : NULL;
+ /*
+ While normal instructions just have comma-separated operands,
+ DAT can be followed by comma-separated list of:
+ label, to be resolved to address
+ value, like 0xffff
+ string, "quoted", characters to be rendered into low-byte of words
+ */
if (operand) {
- struct operand_ **o_next = &instr->operands;
+ struct operand_ **o_next = &operand_list;
- for (x = strtok_r(operand, ",", &st);
+ for (x = strqtok_r(operand, ",", '\\', quot, &qt, &st);
x;
- x = strtok_r(NULL, ",", &st) ) {
- *o_next = malloc(3 + sizeof **o_next); /* FIXME: handle this on the fly later */
+ x = strqtok_r(NULL, ",", '\\', quot, &qt, &st) ) {
+
+ /* trim leading whitespace */
+ x += strspn(x, " \t\n");
+ if (*x == '\0') {
+ fprintf(stderr, "encountered empty operand\n");
+ return -1;
+ }
+ /* trim trailing whitespace */
+ y = x + strlen(x) - 1;
+ while (strchr(" \t\n", *y)) {
+ *y = '\0';
+ y--;
+ }
+
+ /* new operand to append to list */
+ *o_next = malloc(sizeof **o_next);
if (*o_next == NULL) {
fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
- instr_free_(instr);
return -1;
}
- /* trim */
- x += strspn(x, " \t\n");
- if (*x) {
- y = x + strlen(x) - 1;
- while (strchr(" \t\n", *y)) {
- *y = '\0';
- y--;
- }
- }
+ /* assume an operand uses one word, unless it's a string */
+ instr_words_needed += (*x == '"') ? strlen(x) : 1;
(*o_next)->operand = strdup(x);
+ if ((*o_next)->operand == NULL) {
+ fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
+ return -1;
+ }
(*o_next)->next = NULL;
o_next = &((*o_next)->next);
}
}
+ DEBUG_PRINTF("allocating instr with room for %zu words\n", instr_words_needed);
+
+ /* extra room for assembled words */
+ instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
+ if (instr == NULL) {
+ fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
+ return -1;
+ }
+
+ instr->label = label ? strdup(label) : NULL;
+ instr->opcode = opcode ? strdup(opcode) : NULL;
+ instr->operands = operand_list;
+
*next_instr = instr;
return 0;
/* try to generate bytecode for an instruction */
static
-int instr_assemble_(struct dynamic_array_ *labels, struct instruction_ *i, unsigned int allow_short_labels) {
+int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
unsigned int nwu = 0; /* number of words used */
unsigned int incomplete = 0;
int bits;
/* special case DAT */
if (strncasecmp(i->opcode, "DAT", 3) == 0) {
- /* just dump operands into words, I guess */
- fprintf(stderr, "FIXME unhandled raw data\n");
- /* count total length of data.. */
- /* realloc instruction */
- /* populate words */
+ DEBUG_PRINTF("processing DAT...\n");
+
+ i->length = 0;
+
+ while (o) {
+ size_t j, dat_len;
+ char *x;
+ unsigned long l;
+
+ DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, o->next);
+
+ /* is this a string? */
+ if ( (x = strchr("\"'`", o->operand[0])) ) {
+ dat_len = strlen(o->operand) - 1;
+ if (o->operand[dat_len] == *x) {
+ /* it is a string */
+ DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
+
+ for (j = 0, x = o->operand + 1;
+ j < dat_len - 1;
+ j++, x++) {
+ i->instr_words[i->length] = (DCPU16_WORD)*x;
+ i->length++;
+ }
+ }
+ o = o->next;
+ continue;
+ }
+
+ char *ep;
+ errno = 0;
+ l = strtoul(o->operand, &ep, 0);
+ if (errno == 0
+ && (*o->operand && (*ep == '\0')) ) {
+ /* conversion succeeded */
+ if (l > 0xffff) {
+ fprintf(stderr, "value '%lu' out of range\n", l);
+ return -1;
+ }
+ }
+
+ fprintf(stderr, "FIXME finish implementing DAT\n");
+ /* check if it's a parsable number */
+
+ /* otherwise assume it's a label */
+
+
+
+ o = o->next;
+ }
+
return 0;
}
return -1;
}
- bits = value_bits_(labels, o->operand, i->instr_words + nwu + 1, &nwu, allow_short_labels);
+ bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
if (bits == -1) {
fprintf(stderr, "couldn't assemble instruction\n");
return -1;
* break each line into parts, populate parts into structures
*/
static
-int parse_stream_(FILE *f, const char *src, struct dynamic_array_ *instructionps, struct dynamic_array_ *labels, unsigned int allow_short_labels) {
+int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
struct instruction_ *instr, **instr_list_entry;
unsigned int line = 0;
int retval = 0;
if (instr) {
/* add to list of instructions */
- instr_list_entry = dynarray_add_(instructionps, &instr);
+ instr_list_entry = dynarray_add(instructionps, &instr);
if (instr_list_entry == NULL) {
fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
break;
break;
}
- if (dynarray_add_(labels, &new_label) == NULL) {
+ if (dynarray_add(labels, &new_label) == NULL) {
fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
break;
}
* make a full pass over instruction list to resolve labels
*/
static
-int assemble_check_(struct dynamic_array_ *instructionps, struct dynamic_array_ *labels, unsigned int allow_short_labels) {
+int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
int retval = 0;
size_t x;
+ /* fixing short labels .... */
+ /* by here we have our list of instructions and their maximum instruction lengths */
+ /* and we have a list of addresses, based on those maximum lengths */
+ /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
+ /* and reassemble.. */
+ /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
+ /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
+
+ /* try this? keep another list of locations a label address is used */
+ /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
+
DEBUG_PRINTF(" final pass of assembler...\n");
for (x = 0; x < instructionps->entries; x++) {
struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
}
static
-int output_(struct dynamic_array_ *instructionps, const char *filename) {
+int output_(struct dynamic_array *instructionps, const char *filename) {
FILE *of = NULL;
struct instruction_ **instrp;
size_t i, r, total_words = 0;
return 0;
}
-static struct dynamic_array_ *instructionps_;
-static struct dynamic_array_ *labels_;
+static struct dynamic_array *instructionps_;
+static struct dynamic_array *labels_;
int main(int argc, char *argv[]) {
const char *out_filename = NULL;
out_filename = out_filename_default_;
/* init tables */
- instructionps_ = dynarray_new_(sizeof (struct instruction_ *), 1024);
- labels_ = dynarray_new_(sizeof(struct label_), 256);
+ instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
+ labels_ = dynarray_new(sizeof(struct label_), 256);
if (instructionps_ == NULL
|| labels_ == NULL) {
fprintf(stderr, "failed to initialize\n");
--- /dev/null
+/* common.c
+ * Utility functions shared between modules, but not exported.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "common.h"
+#include "dcpu16.h"
+
+/* initialize a generic dynamic array struct */
+int dynarray_init(struct dynamic_array *da, size_t entry_size, size_t grow_size) {
+ assert(da);
+ assert(entry_size);
+ assert(grow_size);
+
+ if (entry_size == 0 || grow_size == 0) {
+ fprintf(stderr, "%s: internal error: sizes cannot be zero\n", __func__);
+ errno = EINVAL;
+ return -1;
+ }
+
+ da->allocated = 0;
+ da->entries = 0;
+ da->entry_size = entry_size;
+ da->grow_size = grow_size;
+ da->a = malloc(da->entry_size * da->grow_size);
+ if (da->a == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+ da->allocated = da->grow_size;
+ return 0;
+}
+
+/* allocate and initialize a new generic dynamic array */
+struct dynamic_array *dynarray_new(size_t entry_size, size_t grow_size) {
+ struct dynamic_array *da;
+
+ assert(entry_size);
+ assert(grow_size);
+
+ da = calloc(1, sizeof *da);
+ if (da == NULL) {
+ fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
+ return NULL;
+ }
+
+ if (dynarray_init(da, entry_size, grow_size)) {
+ fprintf(stderr, "%s():%s\n", "dynarray_init", strerror(errno));
+ free(da);
+ return NULL;
+ }
+
+ return da;
+}
+
+/* copy item onto end of array */
+void *dynarray_add(struct dynamic_array *da, void *item) {
+ void *dst;
+
+ assert(da);
+ assert(item);
+
+ /* make room, make room */
+ if (da->entries == da->allocated) {
+ size_t new_allocated = da->allocated + da->grow_size;
+ void *tmp_ptr = realloc(da->a, new_allocated * da->entry_size);
+ if (tmp_ptr == NULL) {
+ fprintf(stderr, "%s():%s\n", "realloc", strerror(errno));
+ return NULL;
+ }
+ da->a = tmp_ptr;
+ da->allocated = new_allocated;
+
+ }
+
+ dst = DYNARRAY_ITEM(*da, da->entries);
+ memcpy(dst, item, da->entry_size);
+
+ da->entries++;
+
+ return dst;
+}
+
+/* simplified strtoul with range checking */
+int str_to_word(char *s) {
+ unsigned long l;
+ char *ep;
+
+ assert(s);
+
+ errno = 0;
+ l = strtoul(s, &ep, 0);
+
+ if (errno
+ || !(*s && *ep == '\0') ) {
+ /* out of range of conversion, or invalid character encountered */
+ return -1;
+ }
+
+ if (l >= DCPU16_RAM) {
+ /* out of range for our needs */
+ errno = ERANGE;
+ return -1;
+ }
+
+ return l;
+}
+
+/* just like strtok_r, but ignores separators within quotes */
+char *strqtok_r(char *str, const char *sep, int esc, const char *quote, char **lastq, char **lasts) {
+ int escaped = 0;
+ int retry;
+ char *tok,
+ *lastq_ret = NULL,
+ *src,
+ *dst;
+
+ if (str) {
+ *lasts = str;
+ *lastq = NULL;
+ }
+
+ /* next token starts after any leading seps */
+ *lasts += strspn(*lasts, sep);
+ tok = *lasts;
+ if (*tok == '\0')
+ return NULL;
+
+ do {
+ retry = 0;
+ while (**lasts) {
+ /* the previous character was the escape, do not consider this character any further */
+ if (escaped) {
+ escaped = 0;
+ (*lasts)++;
+ continue;
+ }
+
+ /* this character is the escape, do not consider the next character */
+ if (**lasts == esc) {
+ escaped = 1;
+ (*lasts)++;
+ continue;
+ }
+
+ /* we have a quote open, only consider matching quote to close */
+ if (*lastq) {
+ if (**lasts == **lastq)
+ *lastq = NULL;
+ (*lasts)++;
+ continue;
+ }
+
+ /* this character is an opening quote, remember what it is */
+ if (strchr(quote, **lasts)) {
+ *lastq = *lasts;
+ (*lasts)++;
+ continue;
+ }
+
+ /* this character is a separator, separate and be done */
+ if (strchr(sep, **lasts)) {
+ **lasts = '\0';
+ (*lasts)++;
+ break;
+ }
+ (*lasts)++;
+ }
+
+ /* were we left with an unmatched quote?
+ remember where we had trouble
+ try everything following lonely quote again, but pretend quote is there */
+ if (*lastq) {
+ lastq_ret = *lastq;
+ *lasts = *lastq + 1;
+ *lastq = NULL;
+ retry = 1;
+ }
+ } while (retry);
+
+ /* now strip escape characters */
+ for (src = dst = tok; *src; src++, dst++) {
+ if (*src == esc) {
+ src++;
+ if (*src == '\0')
+ break;
+ }
+ *dst = *src;
+ }
+ *dst = *src;
+
+ /* remember where we had trouble */
+ if (lastq_ret)
+ *lastq = lastq_ret;
+
+ return tok;
+}