*
* TODO
* needs ability to specify location for code or data
+ * needs ability to specify label as relative to another label
* short labels not correctly computed
*/
void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
size_t i;
+ /* idea: label1:label2 - calculated as offset between labels */
+
/* for each label.. */
for (i = 0; i < labels->entries; i++) {
struct label_ *l;
return r;
}
-/* buf_tokenize_
- * Parses the zero-terminated line of input 'buf' into a newly-allocated struct instruction_.
- * [label] opcode [operand[,operand[,...]]]
- * Does not yet validate if labels, opcodes, or operands are valid...
+/* tokenize_line_
+ * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
+ * [label] instruction [operand[,operand[,...]]]
+ * Does no validation of contents of any of these tokens, as of yet.
*/
static
-int buf_tokenize_(char *buf, struct instruction_ **next_instr) {
- const char const *sep = " \t\n";
- const char const *quot = "'`\"";
+int tokenize_line_(char *line, struct instruction_ **next_instr) {
+ const char const *whitespace = " \t\n";
+ const char const *quotes = "\"'`";
struct instruction_ *instr = NULL;
+ char *x, *st, *qt;
+ char *label, *opcode;
struct operand_ *operand_list = NULL;
- struct operand_ **o_next = &operand_list;
- char *label = NULL,
- *opcode = NULL;
- char *x,
- *y,
- *st, *qt;
- size_t instr_words_needed = 1;
+ struct operand_ **operand_tail = &operand_list;
+ size_t instr_words_needed = 0;
- assert(buf != NULL);
- assert(next_instr != NULL);
+ assert(line);
+ assert(next_instr);
*next_instr = NULL;
- /* kill leading whitespace */
- buf += strspn(buf, sep);
-
- /* locate first non-quoted ';', ignore anything following it */
- x = strqtok_r(buf, ";", '\\', quot, &qt, &st);
- if (x == NULL)
- return 0;
- if (qt) {
- fprintf(stderr, "unmatched %c-quote\n", *qt);
- return -1;
- }
- if (*buf == '\0')
+ /* strip leading whitespace */
+ line += strspn(line, whitespace);
+ if (*line == '\0')
return 0;
- /* kill trailing whitespace */
- for (x = buf + strlen(buf) - 1; *x && strchr(sep, *x); x--)
- *x = '\0';
- if (*buf == '\0')
+ /* set first bare ';' to '\0', thus isolating any comments */
+ /* here we only care about the side-effect of truncating the first separator character */
+ (void)strqtok_r(line, ";", '\\', quotes, &qt, &st);
+ /* we don't care if there was an unmatched quote at this point, let's see what happens */
+ if (*line == '\0')
return 0;
- DEBUG_PRINTF("trimmed buf: '%s'\n", buf);
-
- /* determine if first token is label, opcode, or we just have a blank line to ignore */
- x = strqtok_r(buf, sep, '\\', quot, &qt, &st);
+ /* carve off the first token, determine if it is a label */
+ x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st);
if (x == NULL || *x == '\0')
return 0;
if (qt) {
- fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
- return -1;
+ /* labels could contain an unmatched quote character, I guess? */
+ qt = NULL;
}
-/* I want c-style labels in my asm, but example in spec uses : in prefix rather than postfix */
+ /* we have something, try to make sense of what it is */
+
#ifdef NON_SPEC_LABELS
- /* labels end with :, otherwise its an opcode */
- y = x + strlen(x) - 1;
- if (*y == ':') {
- DEBUG_PRINTF("found label '%s'\n", y);
- *y = '\0';
+ /* I want my labels like 'label:' */
+ if ( *(x + strlen(line) - 1) == ':' ) {
+ *(x + strlen(line) - 1) = '\0';
+ DEBUG_PRINTF("label: %s\n", x);
+
label = x;
- opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
- if (qt) {
- fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
- return -1;
- }
+
+ opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
} else {
label = NULL;
opcode = x;
}
-#else /* NON_SPEC_LABELS */
- /* labels.. begin? with ':' ? okay, I guess. Whatever. */
- /* otherwise, it's an opcode */
+#endif /* NON_SPEC_LABELS */
+
+ /* spec gives example of labels as ':label' */
if (*x == ':') {
- DEBUG_PRINTF("found label '%s'\n", x);
- label = x + 1;
- opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
- if (qt) {
- fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
- return -1;
- }
+ *x = '\0';
+ x++;
+ label = x;
+ opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
} else {
label = NULL;
opcode = x;
}
-#endif /* NON_SPEC_LABELS */
+ /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */
- if ( !label && (!opcode || !*opcode) ) {
- DEBUG_PRINTF("no label nor instruction?\n");
- return 0;
- }
+ if (opcode && *opcode) {
+ /* if we have an opcode, we'll need at least one word to compile instruction */
+ instr_words_needed++;
- DEBUG_PRINTF("label:'%s' opcode:'%s' operands:'%s'\n", label, opcode, st);
+ while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) {
+ struct operand_ *new_operand;
+ char *y;
- /*
- While normal instructions just have comma-separated operands,
- DAT can be followed by comma-separated list of:
- label, to be resolved to address
- value, like 0xffff
- string, "quoted", characters to be rendered into low-byte of words
- */
+ DEBUG_PRINTF("considering operand '%s'\n", x);
- while ( (x = strqtok_r(NULL, ",", '\\', quot, &qt, &st)) ) {
- DEBUG_PRINTF("\tx:'%s' qt:'%s' st:'%s'\n", x, qt, st);
+ /* trim whitespaces */
+ x += strspn(x, whitespace);
- if (qt) {
- fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
- return -1;
- }
+ DEBUG_PRINTF("considering ftrim operand '%s'\n", x);
- /* trim trailing whitespace */
- y = x + strlen(x) - 1;
- while (strchr(sep, *y)) {
- *y = '\0';
- y--;
- }
+ if (*x) {
+ for (y = x + strlen(x) - 1; *y; y--) {
+ if (strchr(whitespace, *y)) {
+ *y = '\0';
+ }
+ }
+ }
+ /* nothing left? */
+ if (*x == '\0') {
+ fprintf(stderr, "ignoring null operand in line %zu\n", instr->src_line);
+ continue;
+ }
- /* new operand to append to list */
- *o_next = malloc(sizeof **o_next);
- if (*o_next == NULL) {
- fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
- return -1;
- }
+ DEBUG_PRINTF("found operand '%s'\n", x);
- /* assume an operand takes up one word, unless it's a string */
- /* if it's a string, it comes with quotes, which will get stripped, but will include trailing zero */
- instr_words_needed += (*x == '"') ? strlen(x) - 1 : 1;
+ new_operand = malloc(sizeof *new_operand);
+ if (new_operand == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
- (*o_next)->operand = strdup(x);
- if ((*o_next)->operand == NULL) {
- fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
- return -1;
+ new_operand->operand = strdup(x);
+ if (new_operand->operand == NULL) {
+ fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
+ return -1;
+ }
+
+ new_operand->next = NULL;
+
+ if (strchr(quotes, x[0])) {
+ /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */
+ instr_words_needed += strlen(x) - 1;
+ }
+ instr_words_needed++;
+
+ *operand_tail = new_operand;
+ operand_tail = &(*operand_tail)->next;
}
- (*o_next)->next = NULL;
- o_next = &((*o_next)->next);
}
- DEBUG_PRINTF("allocating instr with room for %zu words\n", instr_words_needed);
+ DEBUG_PRINTF("allocating instruction with room for %zu bytes\n", instr_words_needed);
- /* extra room for assembled words */
instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
if (instr == NULL) {
- fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
return -1;
}
- instr->label = label ? strdup(label) : NULL;
- instr->opcode = opcode ? strdup(opcode) : NULL;
+ if (label) {
+ instr->label = strdup(label);
+ if (instr->label == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+ } else {
+ label = NULL;
+ }
+
+ if (opcode) {
+ instr->opcode = strdup(opcode);
+ if (instr->opcode == NULL) {
+ fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
+ return -1;
+ }
+ } else {
+ opcode = NULL;
+ }
+
instr->operands = operand_list;
*next_instr = instr;
break;
}
- if (buf_tokenize_(buf, &instr)) {
+ if (tokenize_line_(buf, &instr)) {
fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
retval = -1;
break;