X-Git-Url: http://git.squeep.com/?a=blobdiff_plain;f=as-dcpu16.c;h=2d131d7a40a167c37ee5115543aba21f9f73e8f6;hb=46ffc28b939d84716b616fdd048ca0c90f374fdd;hp=4ed6050adf2ea078f219a709cae5ddfda8127c80;hpb=4706199a81dc631b6969927e1a6ad27591852b20;p=dcpu16 diff --git a/as-dcpu16.c b/as-dcpu16.c index 4ed6050..2d131d7 100644 --- a/as-dcpu16.c +++ b/as-dcpu16.c @@ -19,6 +19,7 @@ * * TODO * needs ability to specify location for code or data + * needs ability to specify label as relative to another label * short labels not correctly computed */ @@ -77,6 +78,7 @@ struct operand_ { /* keep an array of instructions as we read them in */ struct instruction_ { + size_t src_line; char *label; /* set if a label points here */ char *opcode; /* tokenized instruction text */ struct operand_ *operands; /* list of operands */ @@ -127,6 +129,8 @@ static void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) { size_t i; + /* idea: label1:label2 - calculated as offset between labels */ + /* for each label.. */ for (i = 0; i < labels->entries; i++) { struct label_ *l; @@ -514,143 +518,160 @@ int instruction_print_(struct instruction_ *i, unsigned int with_label) { return r; } -/* buf_tokenize_ - * Parses the zero-terminated line of input 'buf' into a newly-allocated struct instruction_. - * [label] opcode [operand[,operand[,...]]] - * Does not yet validate if labels, opcodes, or operands are valid... +/* tokenize_line_ + * Parses a zero-terminated line of input into a newly-allocated struct instruction_. + * [label] instruction [operand[,operand[,...]]] + * Does no validation of contents of any of these tokens, as of yet. */ static -int buf_tokenize_(char *buf, struct instruction_ **next_instr) { - const char const *sep = " \t\n"; - const char const *quot = "'`\""; +int tokenize_line_(char *line, struct instruction_ **next_instr) { + const char const *whitespace = " \t\n"; + const char const *quotes = "\"'`"; struct instruction_ *instr = NULL; + char *x, *st, *qt; + char *label, *opcode; struct operand_ *operand_list = NULL; - char *label = NULL, - *opcode = NULL, - *operand = NULL; - char *x, - *y, - *st, *qt; - size_t instr_words_needed = 1; + struct operand_ **operand_tail = &operand_list; + size_t instr_words_needed = 0; - assert(buf != NULL); - assert(next_instr != NULL); + assert(line); + assert(next_instr); *next_instr = NULL; - /* kill leading whitespace */ - buf += strspn(buf, " \t\n"); - - /* kill trailing whitespace */ - for (x = buf + strlen(buf); *x && strchr(sep, *x); x--) - *x = '\0'; + /* strip leading whitespace */ + line += strspn(line, whitespace); + if (*line == '\0') + return 0; - /* split on first non-quoted ';', ignore following comment */ - x = strqtok_r(buf, ";", '\\', quot, &qt, &st); - if (x == NULL) + /* set first bare ';' to '\0', thus isolating any comments */ + /* here we only care about the side-effect of truncating the first separator character */ + (void)strqtok_r(line, ";", '\\', quotes, &qt, &st); + /* we don't care if there was an unmatched quote at this point, let's see what happens */ + if (*line == '\0') return 0; - if (qt) { - fprintf(stderr, "unmatched %c-quote\n", *qt); - return -1; - } - /* determine if first token is label, opcode, or we just have a blank line to ignore */ - x = strqtok_r(x, sep, '\\', quot, &qt, &st); - if (x == NULL) + /* carve off the first token, determine if it is a label */ + x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st); + if (x == NULL || *x == '\0') return 0; if (qt) { - fprintf(stderr, "unmatched %c-quote\n", *qt); - return -1; + /* labels could contain an unmatched quote character, I guess? */ + qt = NULL; } -/* I want c-style labels in my asm, but example in spec uses : in prefix rather than postfix */ + /* we have something, try to make sense of what it is */ + #ifdef NON_SPEC_LABELS - /* labels end with :, otherwise its an opcode */ - y = x + strlen(x) - 1; - if (*y == ':') { - *y = '\0'; + /* I want my labels like 'label:' */ + if ( *(x + strlen(line) - 1) == ':' ) { + *(x + strlen(line) - 1) = '\0'; + DEBUG_PRINTF("label: %s\n", x); + label = x; - opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st); + + opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st); } else { label = NULL; opcode = x; } -#else /* NON_SPEC_LABELS */ - /* labels.. begin? with ':' ? okay, I guess. Whatever. */ - /* otherwise, it's an opcode */ +#endif /* NON_SPEC_LABELS */ + + /* spec gives example of labels as ':label' */ if (*x == ':') { - label = x + 1; - opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st); + *x = '\0'; + x++; + label = x; + opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st); } else { label = NULL; opcode = x; } -#endif /* NON_SPEC_LABELS */ + /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */ - if (opcode) { - operand = st; - } + if (opcode && *opcode) { + /* if we have an opcode, we'll need at least one word to compile instruction */ + instr_words_needed++; - /* - While normal instructions just have comma-separated operands, - DAT can be followed by comma-separated list of: - label, to be resolved to address - value, like 0xffff - string, "quoted", characters to be rendered into low-byte of words - */ + while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) { + struct operand_ *new_operand; + char *y; + + DEBUG_PRINTF("considering operand '%s'\n", x); - if (operand) { - struct operand_ **o_next = &operand_list; + /* trim whitespaces */ + x += strspn(x, whitespace); - for (x = strqtok_r(operand, ",", '\\', quot, &qt, &st); - x; - x = strqtok_r(NULL, ",", '\\', quot, &qt, &st) ) { + DEBUG_PRINTF("considering ftrim operand '%s'\n", x); - /* trim leading whitespace */ - x += strspn(x, " \t\n"); + if (*x) { + for (y = x + strlen(x) - 1; *y; y--) { + if (strchr(whitespace, *y)) { + *y = '\0'; + } + } + } + /* nothing left? */ if (*x == '\0') { - fprintf(stderr, "encountered empty operand\n"); - return -1; + fprintf(stderr, "ignoring null operand in line %zu\n", instr->src_line); + continue; } - /* trim trailing whitespace */ - y = x + strlen(x) - 1; - while (strchr(" \t\n", *y)) { - *y = '\0'; - y--; - } + DEBUG_PRINTF("found operand '%s'\n", x); - /* new operand to append to list */ - *o_next = malloc(sizeof **o_next); - if (*o_next == NULL) { - fprintf(stderr, "%s():%s\n", "calloc", strerror(errno)); + new_operand = malloc(sizeof *new_operand); + if (new_operand == NULL) { + fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); return -1; } - /* assume an operand uses one word, unless it's a string */ - instr_words_needed += (*x == '"') ? strlen(x) : 1; - - (*o_next)->operand = strdup(x); - if ((*o_next)->operand == NULL) { + new_operand->operand = strdup(x); + if (new_operand->operand == NULL) { fprintf(stderr, "%s():%s\n", "strdup", strerror(errno)); return -1; } - (*o_next)->next = NULL; - o_next = &((*o_next)->next); + + new_operand->next = NULL; + + if (strchr(quotes, x[0])) { + /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */ + instr_words_needed += strlen(x) - 1; + } + instr_words_needed++; + + *operand_tail = new_operand; + operand_tail = &(*operand_tail)->next; } } - DEBUG_PRINTF("allocating instr with room for %zu words\n", instr_words_needed); + DEBUG_PRINTF("allocating instruction with room for %zu bytes\n", instr_words_needed); - /* extra room for assembled words */ instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr); if (instr == NULL) { - fprintf(stderr, "%s():%s\n", "calloc", strerror(errno)); + fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); return -1; } - instr->label = label ? strdup(label) : NULL; - instr->opcode = opcode ? strdup(opcode) : NULL; + if (label) { + instr->label = strdup(label); + if (instr->label == NULL) { + fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); + return -1; + } + } else { + label = NULL; + } + + if (opcode) { + instr->opcode = strdup(opcode); + if (instr->opcode == NULL) { + fprintf(stderr, "%s():%s\n", "malloc", strerror(errno)); + return -1; + } + } else { + opcode = NULL; + } + instr->operands = operand_list; *next_instr = instr; @@ -659,6 +680,7 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { } /* try to generate bytecode for an instruction */ +/* returns -1 on unrecoverable error */ static int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) { unsigned int nwu = 0; /* number of words used */ @@ -669,7 +691,7 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign if (opt_.verbose > 2) { printf("%s: assembling %p ", __func__, i); instruction_print_(i, 1); - printf("\n"); + printf("(line :%zu)\n", i->src_line); } if (i->ready) { @@ -683,7 +705,7 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign i->length = 0; - while (o) { + for ( /* */ ; o; o = o->next) { size_t j, dat_len; char *x; unsigned long l; @@ -700,14 +722,16 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign for (j = 0, x = o->operand + 1; j < dat_len - 1; j++, x++) { - i->instr_words[i->length] = (DCPU16_WORD)*x; + i->instr_words[i->length] = *x; i->length++; } + /* Note that strings in DAT do not include their zero-terminators */ + /* specify as 'DAT "string", 0' */ } - o = o->next; continue; } + /* is this a number? */ char *ep; errno = 0; l = strtoul(o->operand, &ep, 0); @@ -718,20 +742,27 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign fprintf(stderr, "value '%lu' out of range\n", l); return -1; } + i->instr_words[i->length] = l; + i->length++; + continue; } - fprintf(stderr, "FIXME finish implementing DAT\n"); - /* check if it's a parsable number */ - - /* otherwise assume it's a label */ - - + /* otherwise assume it's a label, even if we don't know what it is */ + if (label_addr_(labels, o->operand, &i->instr_words[i->length])) { + DEBUG_PRINTF("(deferred label resolution)\n"); + incomplete = 1; + } + i->length++; + } - o = o->next; + if (incomplete) { + DEBUG_PRINTF("pending label address\n"); + } else { + i->ready = 1; } return 0; - } + } /* end of DAT */ /* start with opcode bits */ bits = opcode_bits_(i->opcode); @@ -834,13 +865,14 @@ int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, break; } - if (buf_tokenize_(buf, &instr)) { + if (tokenize_line_(buf, &instr)) { fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n"); retval = -1; break; } if (instr) { + instr->src_line = line; /* add to list of instructions */ instr_list_entry = dynarray_add(instructionps, &instr); if (instr_list_entry == NULL) { @@ -867,7 +899,10 @@ int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, label_addr_calculate_(instructionps, labels); } - instr_assemble_(labels, instr, allow_short_labels); + if (instr_assemble_(labels, instr, allow_short_labels)) { + fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n"); + break; + } } } if (ferror(f)) { @@ -904,9 +939,14 @@ int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *l DEBUG_PRINTF(" final pass of assembler...\n"); for (x = 0; x < instructionps->entries; x++) { struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x); - retval |= instr_assemble_(labels, *instrp, allow_short_labels); + retval = instr_assemble_(labels, *instrp, allow_short_labels); if (retval) { fprintf(stderr, "instruction %zu failed to assemble\n", x); + return retval; + } + if (! (*instrp)->ready) { + fprintf(stderr, "instruction not resolvable\n"); + return -1; } } @@ -933,6 +973,9 @@ int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *l return retval; } +/* output_ + * write assembled words to named file + */ static int output_(struct dynamic_array *instructionps, const char *filename) { FILE *of = NULL; @@ -1048,13 +1091,18 @@ int main(int argc, char *argv[]) { } VERBOSE_PRINTF("assembling '%s'...\n", filename); - parse_stream_(f, filename, instructionps_, labels_, allow_short_labels); - + c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels); + if (c) + break; fclose(f); } } else { VERBOSE_PRINTF("assembling '%s'...\n", "stdin"); - parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels); + c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels); + } + if (c) { + fprintf(stderr, "could not parse input, aborting\n"); + exit(EX_DATAERR); } if (assemble_check_(instructionps_, labels_, allow_short_labels)) {