redid line tokenizer
[dcpu16] / as-dcpu16.c
1 #include <stdlib.h>
2 #include <unistd.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sysexits.h>
7 #include <assert.h>
8
9 #include "dcpu16.h"
10 #include "common.h"
11
12 /*
13 * quick and dirty assembler for dcpu16
14 *
15 * Justin Wind <justin.wind@gmail.com>
16 * 2012 04 07 - implementation started
17 * 2012 04 10 - functional
18 * 2012 04 16 - support dat statements
19 *
20 * TODO
21 * needs ability to specify location for code or data
22 * needs ability to specify label as relative to another label
23 * short labels not correctly computed
24 */
25
26 static const char * const src_id_ = "$Id$";
27
28 const char const out_filename_default_[] = "a.out";
29
30 /* global invocation options */
31 struct options {
32 unsigned int verbose;
33 unsigned int dryrun;
34 } opt_ = {
35 .verbose = 0,
36 .dryrun = 0,
37 };
38
39 #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
40 #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
41 #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
42
43 static
44 void usage_(char *prog, unsigned int full) {
45 FILE *f = full ? stdout : stderr;
46 char *x = strrchr(prog, '/');
47
48 if (x && *(x + 1))
49 prog = x + 1;
50
51 if (full)
52 fprintf(f, "%s -- \n\n",
53 prog);
54
55 fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
56 prog);
57
58 if (full) {
59 fprintf(f, "\nOptions:\n"
60 "\t-h -- this screen\n"
61 "\t-o <file> -- output to <file> [default: %s]\n"
62 "\t-s -- allow short labels in instruction words\n"
63 "\t-d -- dry run, print results, do not write to file\n"
64 "\t-v -- verbose output\n",
65 out_filename_default_);
66
67 fprintf(f, "\n%78s\n",
68 src_id_);
69 }
70 }
71
72
73 /* instructions have operands */
74 struct operand_ {
75 struct operand_ *next;
76 char *operand; /* tokenized operand text */
77 };
78
79 /* keep an array of instructions as we read them in */
80 struct instruction_ {
81 size_t src_line;
82 char *label; /* set if a label points here */
83 char *opcode; /* tokenized instruction text */
84 struct operand_ *operands; /* list of operands */
85 unsigned int ready : 1; /* bytecode computed? */
86 unsigned int length; /* number of words of bytecode */
87 DCPU16_WORD instr_words[];
88 };
89
90 /* keep an array of labels, indexed back to their instruction locations */
91 struct label_ {
92 char *label; /* name of label */
93 struct instruction_ **instr; /* pointer into array of instructions */
94 unsigned int ready : 1; /* do we know where this label is yet? */
95 DCPU16_WORD addr;
96 };
97
98
99 /* locate and return the label entry matching name */
100 static
101 struct label_ *label_find_(struct dynamic_array *labels, char *name) {
102 size_t x;
103
104 for (x = 0; x < labels->entries; x++) {
105 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
106 if (strcmp(l->label, name) == 0)
107 return l;
108 }
109 return NULL;
110 }
111
112
113 /* if a label has a validly-calculated address, fetch it */
114 static
115 int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
116 struct label_ *l;
117
118 if ( (l = label_find_(labels, name)) == NULL )
119 return -1;
120 if (! l->ready)
121 return -2;
122 *addr = l->addr;
123 return 0;
124 }
125
126
127 /* attempt to determine the addresses of all labels */
128 static
129 void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
130 size_t i;
131
132 /* idea: label1:label2 - calculated as offset between labels */
133
134 /* for each label.. */
135 for (i = 0; i < labels->entries; i++) {
136 struct label_ *l;
137 struct instruction_ **instr;
138 unsigned int word_count = 0;
139
140 l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
141
142 /* if it's already calculated, great. */
143 if (l->ready)
144 continue;
145
146 /*
147 * starting at the instruction for this label,
148 * walk backwards through the list of instructions
149 * until we get to the start or a known prior label address.
150 * update our label with the freshly calculated addr
151 */
152 for (instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
153 instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0);
154 instr--) {
155
156 if ((*instr)->ready)
157 DEBUG_PRINTF("%s: instr not ready\n", __func__);
158 word_count += (*instr)->length;
159
160 /* have we come across an instruction which a label points to?
161 it should already be calculated, so just add that on and be done */
162 if ((*instr)->label
163 && strcmp((*instr)->label, l->label)) {
164 DCPU16_WORD addr;
165
166 if (label_addr_(labels, (*instr)->label, &addr)) {
167 fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
168 (*instr)->label,
169 l->label);
170 continue;
171 }
172
173 word_count += addr;
174 break;
175 }
176 }
177 l->addr = word_count;
178 l->ready = 1;
179 DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
180 }
181 }
182
183
184 /* generate the nibble for a given basic opcode */
185 static
186 int opcode_bits_(char *opcode) {
187 static struct {
188 char op[4];
189 char value;
190 } opcodes_lower_nibble[] = {
191 { "JSR", 0x00 },
192 /* { "future nbi instruction", 0x00 }, */
193 { "SET", 0x01 },
194 { "ADD", 0x02 },
195 { "SUB", 0x03 },
196 { "MUL", 0x04 },
197 { "DIV", 0x05 },
198 { "MOD", 0x06 },
199 { "SHL", 0x07 },
200 { "SHR", 0x08 },
201 { "AND", 0x09 },
202 { "BOR", 0x0a },
203 { "XOR", 0x0b },
204 { "IFE", 0x0c },
205 { "IFN", 0x0d },
206 { "IFG", 0x0e },
207 { "IFB", 0x0f },
208 { "", 0x00 }
209 }, *o;
210
211 for (o = opcodes_lower_nibble; o->op[0]; o++) {
212 if (strcasecmp(o->op, opcode) == 0)
213 break;
214 }
215
216 if (o->op[0] == '\0') {
217 fprintf(stderr, "unknown instruction '%s'\n", opcode);
218 return -1;
219 }
220
221 return o->value;
222 }
223
224 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
225 static
226 int nbi_opcode_bits_(char *nbi_opcode) {
227 static struct {
228 char op[4];
229 char value;
230 } nbi_opcodes_bits[] = {
231 { " ", 0x00 }, /* reserved for future */
232 { "JSR", 0x01 },
233 { "", 0x00 }
234 }, *o;
235
236 for (o = nbi_opcodes_bits; o->op[0]; o++) {
237 if (strcasecmp(o->op, nbi_opcode) == 0)
238 break;
239 }
240
241 if (o->op[0] == '\0') {
242 fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
243 return -1;
244 }
245
246 return o->value;
247 }
248
249 /* convert register character like 'x' to value like 0x03 */
250 static inline
251 unsigned int register_enumerate_(char r) {
252 const char regs[] = "AaBbCcXxYyZzIiJj";
253 const char *x = strchr(regs, r);
254
255 if (x)
256 return (x - regs)/2;
257
258 fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
259 return -1;
260 }
261
262 /* removes all occurences of chars from buf */
263 static inline
264 void buf_strip_chars_(char *buf, char *chars) {
265 char *s, *d;
266
267 for (s = d = buf; *s; s++, d++) {
268 while (*s && strchr(chars, *s)) {
269 s++;
270 }
271 if (!*s)
272 break;
273 *d = *s;
274 }
275 *d = *s;
276 }
277
278
279 /* value_bits_
280 * generate the six bits for a given operand string
281 * returns -1 if it could not parse the operand
282 * returns -2 if it could not parse the operand due to an unresolved label
283 * notes: nextword may be overwritten even if it's not used in final instruction
284 */
285 static
286 int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
287 static char *operand = NULL;
288 static size_t operand_sz = 0;
289
290 unsigned long l;
291 char *o, *ep;
292
293 /*
294 Our operand working buffer shouldn't ever need to be too big,
295 but DAT might blow that assumption.
296 */
297 if (operand_sz <= strlen(operand_orig)) {
298 void *tmp_ptr;
299 size_t new_sz = strlen(operand_orig);
300
301 if (new_sz < 256)
302 new_sz = 256;
303 new_sz += 256;
304
305 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
306 tmp_ptr = realloc(operand, new_sz);
307 if (tmp_ptr == NULL) {
308 fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
309 return -1;
310 }
311 operand = tmp_ptr;
312 operand_sz = new_sz;
313 }
314
315 o = strcpy(operand, operand_orig);
316
317 DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
318
319 /* this is a very stupid parser */
320
321 /* first, let's trim all whitespace out of string at once to make parsing easier */
322 buf_strip_chars_(operand, " \t\n");
323
324 /* single character might match a register */
325 if (strlen(operand) == 1
326 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
327 DEBUG_PRINTFQ("is register %c\n", *operand);
328 return register_enumerate_(*operand);
329 }
330
331 /* easy matches */
332 if (strcasecmp(operand, "POP") == 0) {
333 DEBUG_PRINTFQ("is POP\n");
334 return 0x18;
335 }
336 if (strcasecmp(operand, "PUSH") == 0) {
337 DEBUG_PRINTFQ("is PUSH\n");
338 return 0x19;
339 }
340 if (strcasecmp(operand, "PEEK") == 0) {
341 DEBUG_PRINTFQ("is PEEK\n");
342 return 0x1a;
343 }
344 if (strcasecmp(operand, "SP") == 0) {
345 DEBUG_PRINTFQ("is register SP\n");
346 return 0x1b;
347 }
348 if (strcasecmp(operand, "PC") == 0) {
349 DEBUG_PRINTFQ("is register PC\n");
350 return 0x1c;
351 }
352 if (strcasecmp(operand, "O") == 0) {
353 DEBUG_PRINTFQ("is register O\n");
354 return 0x1d;
355 }
356
357 /* is the operand [bracketed]? */
358 if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
359 /* eat the brackets */
360 operand[strlen(operand) - 1] = '\0';
361 operand++;
362
363 /* is it [register]? */
364 if (strlen(operand) == 1
365 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
366 DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
367 return 0x08 | register_enumerate_(*operand);
368 }
369
370 /* is it [register+something]? */
371 if ( (ep = strchr(operand, '+')) ) {
372 char *reg;
373 char *constant;
374
375 /* eat the plus */
376 *ep = '\0';
377 ep++;
378
379 /* figure out which one is which */
380 if (strlen(ep) == 1
381 && strchr("AaBbCcXxYyZzIiJj", *ep)) {
382 reg = ep;
383 constant = operand;
384 } else if (strlen(operand) == 1
385 && strchr("AaBbCcXxYyZzIiJj", *operand) ) {
386 reg = operand;
387 constant = ep;
388 } else {
389 DEBUG_PRINTFQ("is unparsable\n");
390 fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
391 return -1;
392 }
393
394 /* check if something is understandable as a value */
395 errno = 0;
396 l = strtoul(constant, &ep, 0);
397 if (errno == 0
398 && (*constant && (*ep == '\0')) ) {
399 /* string conversion went without issue */
400 /* validate it will fit in a word */
401 if (l > 0xffff) {
402 DEBUG_PRINTFQ("is out of range\n");
403 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
404 return -1;
405 }
406
407 /* seems fine */
408 *nextword = l & 0xffff;
409 *nextwordused += 1;
410 DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
411 return 0x10 | register_enumerate_(*reg);
412 } else if (errno) {
413 DEBUG_PRINTFQ("is out of range\n");
414 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
415 return -1;
416 }
417
418 /* what? still here? assume it's a label, I guess */
419 /* try to populate nextword with label address */
420 if (label_addr_(labels, operand, nextword)) {
421 DEBUG_PRINTFQ("(deferred label resolution)\n");
422 *nextwordused += 1;
423 return -2;
424 }
425 DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
426 *nextwordused += 1;
427 return 0x10 | register_enumerate_(*reg);
428 }
429
430 /* it must just be a dereferenced literal then */
431
432 errno = 0;
433 l = strtoul(operand, &ep, 0);
434 if (errno == 0
435 && (*operand && (*ep == '\0')) ) {
436 /* string conversion went without issue */
437 /* validate it will fit in a word */
438 if (l > 0xffff) {
439 DEBUG_PRINTFQ("is out of range\n");
440 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
441 return -1;
442 }
443
444 DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
445 *nextword = l & 0xffff;
446 *nextwordused += 1;
447 return 0x1e;
448 } else if (errno) {
449 DEBUG_PRINTFQ("is out of range\n");
450 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
451 }
452
453 /* not a number? try a label */
454 if (label_addr_(labels, operand, nextword)) {
455 DEBUG_PRINTFQ("(deferred label resolution)\n");
456 *nextwordused += 1;
457 return -2;
458 }
459 DEBUG_PRINTFQ("is a dereferenced label\n");
460 *nextwordused += 1;
461 return 0x1e;
462 }
463
464 /* left with a literal or a label, then */
465
466 errno = 0;
467 l = strtoul(operand, &ep, 0);
468 if (errno == 0
469 || (*operand && (*ep == '\0')) ) {
470 if (l > 0xffff) {
471 DEBUG_PRINTFQ("is out of range\n");
472 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
473 return -1;
474 }
475
476 DEBUG_PRINTFQ("is literal value (%lu)\n", l);
477 if (l < 0x20) {
478 return l + 0x20;
479 }
480
481 *nextword = l & 0xffff;
482 *nextwordused += 1;
483 return 0x1f;
484 }
485
486 /* try to populate nextword with label address */
487 if (label_addr_(labels, operand, nextword)) {
488 DEBUG_PRINTFQ("(deferred label resolution)\n");
489 /* assume non-small literal value */
490 *nextwordused += 1;
491 return -2;
492 }
493
494 DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
495 if (*nextword < 0x20 && allow_short_labels) {
496 DEBUG_PRINTF("small value label win\n");
497 return (0x20 + *nextword) & 0x3f;
498 }
499
500 *nextwordused += 1;
501 return 0x1f;
502 }
503
504 /* prints an instruction's assembly */
505 static inline
506 int instruction_print_(struct instruction_ *i, unsigned int with_label) {
507 struct operand_ *o;
508 int r;
509
510 if (with_label)
511 r = printf("%-16s %3s", i->label ? i->label : "", i->opcode);
512 else
513 r = printf("%3s", i->opcode);
514
515 for (o = i->operands; o; o = o->next)
516 r += printf(" %s%s", o->operand, o->next ? "," : "");
517
518 return r;
519 }
520
521 /* tokenize_line_
522 * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
523 * [label] instruction [operand[,operand[,...]]]
524 * Does no validation of contents of any of these tokens, as of yet.
525 */
526 static
527 int tokenize_line_(char *line, struct instruction_ **next_instr) {
528 const char const *whitespace = " \t\n";
529 const char const *quotes = "\"'`";
530 struct instruction_ *instr = NULL;
531 char *x, *st, *qt;
532 char *label, *opcode;
533 struct operand_ *operand_list = NULL;
534 struct operand_ **operand_tail = &operand_list;
535 size_t instr_words_needed = 0;
536
537 assert(line);
538 assert(next_instr);
539
540 *next_instr = NULL;
541
542 /* strip leading whitespace */
543 line += strspn(line, whitespace);
544 if (*line == '\0')
545 return 0;
546
547 /* set first bare ';' to '\0', thus isolating any comments */
548 /* here we only care about the side-effect of truncating the first separator character */
549 (void)strqtok_r(line, ";", '\\', quotes, &qt, &st);
550 /* we don't care if there was an unmatched quote at this point, let's see what happens */
551 if (*line == '\0')
552 return 0;
553
554 /* carve off the first token, determine if it is a label */
555 x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st);
556 if (x == NULL || *x == '\0')
557 return 0;
558 if (qt) {
559 /* labels could contain an unmatched quote character, I guess? */
560 qt = NULL;
561 }
562
563 /* we have something, try to make sense of what it is */
564
565 #ifdef NON_SPEC_LABELS
566 /* I want my labels like 'label:' */
567 if ( *(x + strlen(line) - 1) == ':' ) {
568 *(x + strlen(line) - 1) = '\0';
569 DEBUG_PRINTF("label: %s\n", x);
570
571 label = x;
572
573 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
574 } else {
575 label = NULL;
576 opcode = x;
577 }
578 #endif /* NON_SPEC_LABELS */
579
580 /* spec gives example of labels as ':label' */
581 if (*x == ':') {
582 *x = '\0';
583 x++;
584 label = x;
585 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
586 } else {
587 label = NULL;
588 opcode = x;
589 }
590 /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */
591
592 if (opcode && *opcode) {
593 /* if we have an opcode, we'll need at least one word to compile instruction */
594 instr_words_needed++;
595
596 while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) {
597 struct operand_ *new_operand;
598 char *y;
599
600 DEBUG_PRINTF("considering operand '%s'\n", x);
601
602 /* trim whitespaces */
603 x += strspn(x, whitespace);
604
605 DEBUG_PRINTF("considering ftrim operand '%s'\n", x);
606
607 if (*x) {
608 for (y = x + strlen(x) - 1; *y; y--) {
609 if (strchr(whitespace, *y)) {
610 *y = '\0';
611 }
612 }
613 }
614 /* nothing left? */
615 if (*x == '\0') {
616 fprintf(stderr, "ignoring null operand in line %zu\n", instr->src_line);
617 continue;
618 }
619
620 DEBUG_PRINTF("found operand '%s'\n", x);
621
622 new_operand = malloc(sizeof *new_operand);
623 if (new_operand == NULL) {
624 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
625 return -1;
626 }
627
628 new_operand->operand = strdup(x);
629 if (new_operand->operand == NULL) {
630 fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
631 return -1;
632 }
633
634 new_operand->next = NULL;
635
636 if (strchr(quotes, x[0])) {
637 /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */
638 instr_words_needed += strlen(x) - 1;
639 }
640 instr_words_needed++;
641
642 *operand_tail = new_operand;
643 operand_tail = &(*operand_tail)->next;
644 }
645 }
646
647 DEBUG_PRINTF("allocating instruction with room for %zu bytes\n", instr_words_needed);
648
649 instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
650 if (instr == NULL) {
651 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
652 return -1;
653 }
654
655 if (label) {
656 instr->label = strdup(label);
657 if (instr->label == NULL) {
658 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
659 return -1;
660 }
661 } else {
662 label = NULL;
663 }
664
665 if (opcode) {
666 instr->opcode = strdup(opcode);
667 if (instr->opcode == NULL) {
668 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
669 return -1;
670 }
671 } else {
672 opcode = NULL;
673 }
674
675 instr->operands = operand_list;
676
677 *next_instr = instr;
678
679 return 0;
680 }
681
682 /* try to generate bytecode for an instruction */
683 /* returns -1 on unrecoverable error */
684 static
685 int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
686 unsigned int nwu = 0; /* number of words used */
687 unsigned int incomplete = 0;
688 int bits;
689 struct operand_ *o = i->operands;
690
691 if (opt_.verbose > 2) {
692 printf("%s: assembling %p ", __func__, i);
693 instruction_print_(i, 1);
694 printf("(line :%zu)\n", i->src_line);
695 }
696
697 if (i->ready) {
698 /* already assembled, nothing to do */
699 return 0;
700 }
701
702 /* special case DAT */
703 if (strncasecmp(i->opcode, "DAT", 3) == 0) {
704 DEBUG_PRINTF("processing DAT...\n");
705
706 i->length = 0;
707
708 for ( /* */ ; o; o = o->next) {
709 size_t j, dat_len;
710 char *x;
711 unsigned long l;
712
713 DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, o->next);
714
715 /* is this a string? */
716 if ( (x = strchr("\"'`", o->operand[0])) ) {
717 dat_len = strlen(o->operand) - 1;
718 if (o->operand[dat_len] == *x) {
719 /* it is a string */
720 DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
721
722 for (j = 0, x = o->operand + 1;
723 j < dat_len - 1;
724 j++, x++) {
725 i->instr_words[i->length] = *x;
726 i->length++;
727 }
728 /* Note that strings in DAT do not include their zero-terminators */
729 /* specify as 'DAT "string", 0' */
730 }
731 continue;
732 }
733
734 /* is this a number? */
735 char *ep;
736 errno = 0;
737 l = strtoul(o->operand, &ep, 0);
738 if (errno == 0
739 && (*o->operand && (*ep == '\0')) ) {
740 /* conversion succeeded */
741 if (l > 0xffff) {
742 fprintf(stderr, "value '%lu' out of range\n", l);
743 return -1;
744 }
745 i->instr_words[i->length] = l;
746 i->length++;
747 continue;
748 }
749
750 /* otherwise assume it's a label, even if we don't know what it is */
751 if (label_addr_(labels, o->operand, &i->instr_words[i->length])) {
752 DEBUG_PRINTF("(deferred label resolution)\n");
753 incomplete = 1;
754 }
755 i->length++;
756 }
757
758 if (incomplete) {
759 DEBUG_PRINTF("pending label address\n");
760 } else {
761 i->ready = 1;
762 }
763
764 return 0;
765 } /* end of DAT */
766
767 /* start with opcode bits */
768 bits = opcode_bits_(i->opcode);
769 if (bits < 0) {
770 fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
771 for (o = i->operands; o; o = o->next)
772 fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
773 fprintf(stderr, "'\n");
774 return -1;
775 }
776 i->instr_words[0] |= 0x0f & bits;
777
778 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
779 if ((bits & 0x0f) == 0) {
780 bits = nbi_opcode_bits_(i->opcode);
781 if (bits < 0) {
782 fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
783 exit(EX_SOFTWARE);
784 }
785 } else {
786 if (o == NULL) {
787 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
788 return -1;
789 }
790 bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
791 if (bits == -1) {
792 fprintf(stderr, "couldn't assemble instruction\n");
793 return -1;
794 } else if (bits == -2) {
795 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
796 /* keep going, but don't finalize until we can calculate label address */
797 incomplete = 1;
798 bits = 0;
799 }
800 o = o->next;
801 }
802 i->instr_words[0] |= (bits & 0x3f) << 4;
803
804 if (o == NULL) {
805 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
806 return -1;
807 }
808
809 bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
810 if (bits == -1) {
811 fprintf(stderr, "couldn't assemble instruction\n");
812 return -1;
813 } else if (bits == -2) {
814 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
815 /* keep going, but don't finalize until we can calculate label address */
816 incomplete = 1;
817 bits = 0;
818 }
819 o = o->next;
820 i->instr_words[0] |= (bits & 0x3f) << 10;
821
822 if (o != NULL) {
823 fprintf(stderr, "too many operands\n");
824 return -1;
825 }
826
827 /* counting labels as words, we now know at least the maximum instruction length */
828
829 i->length = nwu + 1;
830
831 DEBUG_PRINTF("instruction words: [%u]", i->length);
832 for (bits = 0; bits <= (int)nwu; bits++)
833 DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
834
835 if (incomplete) {
836 DEBUG_PRINTFQ(" (preliminary)");
837 } else {
838 i->ready = 1;
839 }
840
841 DEBUG_PRINTFQ("\n");
842
843 return 0;
844 }
845
846 /* parse_stream_
847 * read lines from stream f
848 * break each line into parts, populate parts into structures
849 */
850 static
851 int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
852 struct instruction_ *instr, **instr_list_entry;
853 unsigned int line = 0;
854 int retval = 0;
855 char buf[0x4000];
856
857 buf[sizeof buf - 1] = '\0';
858
859 while (fgets(buf, sizeof buf, f)) {
860 line++;
861
862 if (buf[sizeof buf - 1] != '\0') {
863 fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
864 retval = -1;
865 break;
866 }
867
868 if (tokenize_line_(buf, &instr)) {
869 fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
870 retval = -1;
871 break;
872 }
873
874 if (instr) {
875 instr->src_line = line;
876 /* add to list of instructions */
877 instr_list_entry = dynarray_add(instructionps, &instr);
878 if (instr_list_entry == NULL) {
879 fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
880 break;
881 }
882
883 if (instr->label) {
884 struct label_ new_label = {
885 .label = instr->label,
886 .instr = instr_list_entry,
887 .ready = 0,
888 .addr = 0,
889 };
890 if (label_find_(labels, instr->label)) {
891 fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
892 break;
893 }
894
895 if (dynarray_add(labels, &new_label) == NULL) {
896 fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
897 break;
898 }
899 label_addr_calculate_(instructionps, labels);
900 }
901
902 if (instr_assemble_(labels, instr, allow_short_labels)) {
903 fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n");
904 break;
905 }
906 }
907 }
908 if (ferror(f)) {
909 fprintf(stderr, "%s():%s\n", "fgets", strerror(errno));
910 return -1;
911 }
912 if (! feof(f)) {
913 fprintf(stderr, "parsing aborted\n");
914 return -1;
915 }
916
917 return retval;
918 }
919
920 /* assemble_check_
921 * make a full pass over instruction list to resolve labels
922 */
923 static
924 int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
925 int retval = 0;
926 size_t x;
927
928 /* fixing short labels .... */
929 /* by here we have our list of instructions and their maximum instruction lengths */
930 /* and we have a list of addresses, based on those maximum lengths */
931 /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
932 /* and reassemble.. */
933 /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
934 /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
935
936 /* try this? keep another list of locations a label address is used */
937 /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
938
939 DEBUG_PRINTF(" final pass of assembler...\n");
940 for (x = 0; x < instructionps->entries; x++) {
941 struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
942 retval = instr_assemble_(labels, *instrp, allow_short_labels);
943 if (retval) {
944 fprintf(stderr, "instruction %zu failed to assemble\n", x);
945 return retval;
946 }
947 if (! (*instrp)->ready) {
948 fprintf(stderr, "instruction not resolvable\n");
949 return -1;
950 }
951 }
952
953 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
954 for (x = 0; x < labels->entries; x++) {
955 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
956 if (! l->ready)
957 retval |= -1;
958 if (opt_.verbose) {
959 printf("%3s0x%04x %-32s ",
960 l->ready ? "" : "*",
961 l->addr,
962 l->label);
963 instruction_print_(*(l->instr), 0);
964 printf("\n");
965 }
966 }
967
968 VERBOSE_PRINTF("\n");
969
970 if (retval)
971 fprintf(stderr, "some labels could not be resolved\n");
972
973 return retval;
974 }
975
976 /* output_
977 * write assembled words to named file
978 */
979 static
980 int output_(struct dynamic_array *instructionps, const char *filename) {
981 FILE *of = NULL;
982 struct instruction_ **instrp;
983 size_t i, r, total_words = 0;
984 size_t x;
985
986 if (! opt_.dryrun) {
987 of = fopen(filename, "w");
988 if (of == NULL) {
989 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
990 return -1;
991 }
992 }
993
994 for (i = 0; i < instructionps->entries; i++) {
995 instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
996
997 if (opt_.verbose) {
998 int s;
999 s = instruction_print_(*instrp, 1);
1000 printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
1001 for (x = 0; x < (*instrp)->length; x++) {
1002 printf(" %04x", (*instrp)->instr_words[x]);
1003 }
1004 printf("\n");
1005 }
1006
1007 if (of) {
1008 r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
1009 if (r < (*instrp)->length) {
1010 fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
1011 return -1;
1012 }
1013 }
1014 total_words += (*instrp)->length;
1015 }
1016
1017 fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
1018 opt_.dryrun ? "assembled" : "wrote",
1019 i,
1020 total_words);
1021
1022 return 0;
1023 }
1024
1025 static struct dynamic_array *instructionps_;
1026 static struct dynamic_array *labels_;
1027
1028 int main(int argc, char *argv[]) {
1029 const char *out_filename = NULL;
1030 unsigned int allow_short_labels = 0;
1031 int c;
1032
1033 while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
1034 switch (c) {
1035 case 'd':
1036 opt_.dryrun++;
1037 break;
1038
1039 case 's':
1040 allow_short_labels++;
1041 break;
1042
1043 case 'o':
1044 if (out_filename) {
1045 fprintf(stderr, "Sorry, I can only write one file at a time.\n");
1046 exit(EX_CANTCREAT);
1047 }
1048 out_filename = optarg;
1049 break;
1050
1051 case 'v':
1052 opt_.verbose++;
1053 break;
1054
1055 case 'h':
1056 usage_(argv[0], 1);
1057 exit(EX_OK);
1058
1059 default:
1060 usage_(argv[0], 0);
1061 exit(EX_USAGE);
1062 }
1063 }
1064
1065 argc -= optind;
1066 argv += optind;
1067
1068 if (out_filename == NULL)
1069 out_filename = out_filename_default_;
1070
1071 /* init tables */
1072 instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
1073 labels_ = dynarray_new(sizeof(struct label_), 256);
1074 if (instructionps_ == NULL
1075 || labels_ == NULL) {
1076 fprintf(stderr, "failed to initialize\n");
1077 exit(EX_OSERR);
1078 }
1079
1080 /* if filenames were specified, parse them instead of stdin */
1081 if (argc) {
1082 while (argc) {
1083 char *filename = *argv;
1084 FILE *f = fopen(filename, "r");
1085
1086 argc--, argv++;
1087
1088 if (f == NULL) {
1089 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1090 continue;
1091 }
1092
1093 VERBOSE_PRINTF("assembling '%s'...\n", filename);
1094 c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
1095 if (c)
1096 break;
1097 fclose(f);
1098 }
1099 } else {
1100 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
1101 c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
1102 }
1103 if (c) {
1104 fprintf(stderr, "could not parse input, aborting\n");
1105 exit(EX_DATAERR);
1106 }
1107
1108 if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
1109 fprintf(stderr, "errors prevented assembly\n");
1110 exit(EX_DATAERR);
1111 }
1112
1113 if (output_(instructionps_, out_filename)) {
1114 fprintf(stderr, "failed to create output\n");
1115 exit(EX_OSERR);
1116 }
1117
1118 exit(EX_OK);
1119 }