From 8e7b08b2dd8bb2d45aeec0034e55aff729b1f12a Mon Sep 17 00:00:00 2001 From: Justin Wind Date: Tue, 17 Apr 2012 12:45:41 -0700 Subject: [PATCH] fixing sloppy errors introduced with new strqtok prior commit broke things, this one works better --- as-dcpu16.c | 167 ++++++++++++++++++++++++++++++++-------------------- common.c | 6 +- 2 files changed, 109 insertions(+), 64 deletions(-) diff --git a/as-dcpu16.c b/as-dcpu16.c index 4ed6050..22a05a1 100644 --- a/as-dcpu16.c +++ b/as-dcpu16.c @@ -77,6 +77,7 @@ struct operand_ { /* keep an array of instructions as we read them in */ struct instruction_ { + size_t src_line; char *label; /* set if a label points here */ char *opcode; /* tokenized instruction text */ struct operand_ *operands; /* list of operands */ @@ -525,9 +526,9 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { const char const *quot = "'`\""; struct instruction_ *instr = NULL; struct operand_ *operand_list = NULL; + struct operand_ **o_next = &operand_list; char *label = NULL, - *opcode = NULL, - *operand = NULL; + *opcode = NULL; char *x, *y, *st, *qt; @@ -539,13 +540,9 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { *next_instr = NULL; /* kill leading whitespace */ - buf += strspn(buf, " \t\n"); + buf += strspn(buf, sep); - /* kill trailing whitespace */ - for (x = buf + strlen(buf); *x && strchr(sep, *x); x--) - *x = '\0'; - - /* split on first non-quoted ';', ignore following comment */ + /* locate first non-quoted ';', ignore anything following it */ x = strqtok_r(buf, ";", '\\', quot, &qt, &st); if (x == NULL) return 0; @@ -553,13 +550,23 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { fprintf(stderr, "unmatched %c-quote\n", *qt); return -1; } + if (*buf == '\0') + return 0; + + /* kill trailing whitespace */ + for (x = buf + strlen(buf) - 1; *x && strchr(sep, *x); x--) + *x = '\0'; + if (*buf == '\0') + return 0; + + DEBUG_PRINTF("trimmed buf: '%s'\n", buf); /* determine if first token is label, opcode, or we just have a blank line to ignore */ - x = strqtok_r(x, sep, '\\', quot, &qt, &st); - if (x == NULL) + x = strqtok_r(buf, sep, '\\', quot, &qt, &st); + if (x == NULL || *x == '\0') return 0; if (qt) { - fprintf(stderr, "unmatched %c-quote\n", *qt); + fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt); return -1; } @@ -568,9 +575,14 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { /* labels end with :, otherwise its an opcode */ y = x + strlen(x) - 1; if (*y == ':') { + DEBUG_PRINTF("found label '%s'\n", y); *y = '\0'; label = x; opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st); + if (qt) { + fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt); + return -1; + } } else { label = NULL; opcode = x; @@ -579,18 +591,26 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { /* labels.. begin? with ':' ? okay, I guess. Whatever. */ /* otherwise, it's an opcode */ if (*x == ':') { + DEBUG_PRINTF("found label '%s'\n", x); label = x + 1; opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st); + if (qt) { + fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt); + return -1; + } } else { label = NULL; opcode = x; } #endif /* NON_SPEC_LABELS */ - if (opcode) { - operand = st; + if ( !label && (!opcode || !*opcode) ) { + DEBUG_PRINTF("no label nor instruction?\n"); + return 0; } + DEBUG_PRINTF("label:'%s' opcode:'%s' operands:'%s'\n", label, opcode, st); + /* While normal instructions just have comma-separated operands, DAT can be followed by comma-separated list of: @@ -599,45 +619,39 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { string, "quoted", characters to be rendered into low-byte of words */ - if (operand) { - struct operand_ **o_next = &operand_list; - - for (x = strqtok_r(operand, ",", '\\', quot, &qt, &st); - x; - x = strqtok_r(NULL, ",", '\\', quot, &qt, &st) ) { + while ( (x = strqtok_r(NULL, ",", '\\', quot, &qt, &st)) ) { + DEBUG_PRINTF("\tx:'%s' qt:'%s' st:'%s'\n", x, qt, st); - /* trim leading whitespace */ - x += strspn(x, " \t\n"); - if (*x == '\0') { - fprintf(stderr, "encountered empty operand\n"); - return -1; - } + if (qt) { + fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt); + return -1; + } - /* trim trailing whitespace */ - y = x + strlen(x) - 1; - while (strchr(" \t\n", *y)) { - *y = '\0'; - y--; - } + /* trim trailing whitespace */ + y = x + strlen(x) - 1; + while (strchr(sep, *y)) { + *y = '\0'; + y--; + } - /* new operand to append to list */ - *o_next = malloc(sizeof **o_next); - if (*o_next == NULL) { - fprintf(stderr, "%s():%s\n", "calloc", strerror(errno)); - return -1; - } + /* new operand to append to list */ + *o_next = malloc(sizeof **o_next); + if (*o_next == NULL) { + fprintf(stderr, "%s():%s\n", "calloc", strerror(errno)); + return -1; + } - /* assume an operand uses one word, unless it's a string */ - instr_words_needed += (*x == '"') ? strlen(x) : 1; + /* assume an operand takes up one word, unless it's a string */ + /* if it's a string, it comes with quotes, which will get stripped, but will include trailing zero */ + instr_words_needed += (*x == '"') ? strlen(x) - 1 : 1; - (*o_next)->operand = strdup(x); - if ((*o_next)->operand == NULL) { - fprintf(stderr, "%s():%s\n", "strdup", strerror(errno)); - return -1; - } - (*o_next)->next = NULL; - o_next = &((*o_next)->next); + (*o_next)->operand = strdup(x); + if ((*o_next)->operand == NULL) { + fprintf(stderr, "%s():%s\n", "strdup", strerror(errno)); + return -1; } + (*o_next)->next = NULL; + o_next = &((*o_next)->next); } DEBUG_PRINTF("allocating instr with room for %zu words\n", instr_words_needed); @@ -659,6 +673,7 @@ int buf_tokenize_(char *buf, struct instruction_ **next_instr) { } /* try to generate bytecode for an instruction */ +/* returns -1 on unrecoverable error */ static int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) { unsigned int nwu = 0; /* number of words used */ @@ -669,7 +684,7 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign if (opt_.verbose > 2) { printf("%s: assembling %p ", __func__, i); instruction_print_(i, 1); - printf("\n"); + printf("(line :%zu)\n", i->src_line); } if (i->ready) { @@ -683,7 +698,7 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign i->length = 0; - while (o) { + for ( /* */ ; o; o = o->next) { size_t j, dat_len; char *x; unsigned long l; @@ -700,14 +715,16 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign for (j = 0, x = o->operand + 1; j < dat_len - 1; j++, x++) { - i->instr_words[i->length] = (DCPU16_WORD)*x; + i->instr_words[i->length] = *x; i->length++; } + /* Note that strings in DAT do not include their zero-terminators */ + /* specify as 'DAT "string", 0' */ } - o = o->next; continue; } + /* is this a number? */ char *ep; errno = 0; l = strtoul(o->operand, &ep, 0); @@ -718,20 +735,27 @@ int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsign fprintf(stderr, "value '%lu' out of range\n", l); return -1; } + i->instr_words[i->length] = l; + i->length++; + continue; } - fprintf(stderr, "FIXME finish implementing DAT\n"); - /* check if it's a parsable number */ - - /* otherwise assume it's a label */ - - + /* otherwise assume it's a label, even if we don't know what it is */ + if (label_addr_(labels, o->operand, &i->instr_words[i->length])) { + DEBUG_PRINTF("(deferred label resolution)\n"); + incomplete = 1; + } + i->length++; + } - o = o->next; + if (incomplete) { + DEBUG_PRINTF("pending label address\n"); + } else { + i->ready = 1; } return 0; - } + } /* end of DAT */ /* start with opcode bits */ bits = opcode_bits_(i->opcode); @@ -841,6 +865,7 @@ int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, } if (instr) { + instr->src_line = line; /* add to list of instructions */ instr_list_entry = dynarray_add(instructionps, &instr); if (instr_list_entry == NULL) { @@ -867,7 +892,10 @@ int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, label_addr_calculate_(instructionps, labels); } - instr_assemble_(labels, instr, allow_short_labels); + if (instr_assemble_(labels, instr, allow_short_labels)) { + fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n"); + break; + } } } if (ferror(f)) { @@ -904,9 +932,14 @@ int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *l DEBUG_PRINTF(" final pass of assembler...\n"); for (x = 0; x < instructionps->entries; x++) { struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x); - retval |= instr_assemble_(labels, *instrp, allow_short_labels); + retval = instr_assemble_(labels, *instrp, allow_short_labels); if (retval) { fprintf(stderr, "instruction %zu failed to assemble\n", x); + return retval; + } + if (! (*instrp)->ready) { + fprintf(stderr, "instruction not resolvable\n"); + return -1; } } @@ -933,6 +966,9 @@ int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *l return retval; } +/* output_ + * write assembled words to named file + */ static int output_(struct dynamic_array *instructionps, const char *filename) { FILE *of = NULL; @@ -1048,13 +1084,18 @@ int main(int argc, char *argv[]) { } VERBOSE_PRINTF("assembling '%s'...\n", filename); - parse_stream_(f, filename, instructionps_, labels_, allow_short_labels); - + c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels); + if (c) + break; fclose(f); } } else { VERBOSE_PRINTF("assembling '%s'...\n", "stdin"); - parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels); + c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels); + } + if (c) { + fprintf(stderr, "could not parse input, aborting\n"); + exit(EX_DATAERR); } if (assemble_check_(instructionps_, labels_, allow_short_labels)) { diff --git a/common.c b/common.c index 9dbadf3..ce1b6c0 100644 --- a/common.c +++ b/common.c @@ -126,7 +126,11 @@ char *strqtok_r(char *str, const char *sep, int esc, const char *quote, char **l } /* next token starts after any leading seps */ - *lasts += strspn(*lasts, sep); + while (**lasts && strchr(sep, **lasts)) { + **lasts = '\0'; + (*lasts)++; + } + tok = *lasts; if (*tok == '\0') return NULL; -- 2.45.2