generalized list handling of instructions and labels
[dcpu16] / as-dcpu16.c
1 #include <stdlib.h>
2 #include <unistd.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sysexits.h>
7 #include <assert.h>
8
9 #include "dcpu16.h"
10
11 /*
12 * quick and dirty assembler for dcpu16
13 *
14 * Justin Wind <justin.wind@gmail.com>
15 * 2012 04 07 - implementation started
16 * 2012 04 10 - functional
17 *
18 * TODO
19 * needs ability to specify location for code or data
20 * short labels not correctly computed
21 */
22
23 static const char * const src_id_ = "$Id$";
24
25 const char const out_filename_default_[] = "a.out";
26
27 /* global invocation options */
28 struct options {
29 unsigned int verbose;
30 unsigned int dryrun;
31 } opt_ = {
32 .verbose = 0,
33 .dryrun = 0,
34 };
35
36 #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
37 #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
38 #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
39
40 static
41 void usage_(char *prog, unsigned int full) {
42 FILE *f = full ? stdout : stderr;
43 char *x = strrchr(prog, '/');
44
45 if (x && *(x + 1))
46 prog = x + 1;
47
48 if (full)
49 fprintf(f, "%s -- \n\n",
50 prog);
51
52 fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
53 prog);
54
55 if (full) {
56 fprintf(f, "\nOptions:\n"
57 "\t-h -- this screen\n"
58 "\t-o <file> -- output to <file> [default: %s]\n"
59 "\t-s -- allow short labels in instruction words\n"
60 "\t-d -- dry run, print results, do not write to file\n"
61 "\t-v -- verbose output\n",
62 out_filename_default_);
63
64 fprintf(f, "\n%78s\n",
65 src_id_);
66 }
67 }
68
69
70 /* instructions have operands */
71 struct operand_ {
72 struct operand_ *next;
73 char *operand; /* tokenized operand text */
74 };
75
76 /* keep an array of instructions as we read them in */
77 struct instruction_ {
78 char *label; /* set if a label points here */
79 char *opcode; /* tokenized instruction text */
80 struct operand_ *operands; /* list of operands */
81 unsigned int ready : 1; /* bytecode computed? */
82 unsigned int length; /* number of words of bytecode */
83 DCPU16_WORD instr_words[];
84 };
85
86 /* keep an array of labels, indexed back to their instruction locations */
87 struct label_ {
88 char *label; /* name of label */
89 struct instruction_ **instr; /* pointer into array of instructions */
90 unsigned int ready : 1; /* do we know where this label is yet? */
91 DCPU16_WORD addr;
92 };
93
94
95 /* routines to support generic grow-able arrays */
96
97 struct dynamic_array_ {
98 size_t entry_size;
99 size_t grow_size;
100 size_t allocated;
101 size_t entries;
102 void *a;
103 };
104
105 #define DYNARRAY_ITEM(da, index) ( (char *)(da).a + ( (da).entry_size * index ) )
106
107 /* allocate and initialize a new generic dynamic array */
108 static
109 struct dynamic_array_ *dynarray_new_(size_t entry_size, size_t grow_size) {
110 struct dynamic_array_ *da;
111
112 if (entry_size == 0 || grow_size == 0) {
113 fprintf(stderr, "%s: internal error: sizes cannot be zero\n", __func__);
114 exit(EX_SOFTWARE);
115 }
116
117 da = calloc(1, sizeof *da);
118 if (da == NULL) {
119 fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
120 return NULL;
121 }
122
123 da->entry_size = entry_size;
124 da->grow_size = grow_size;
125
126 da->a = malloc(da->entry_size * da->grow_size);
127 if (da->a == NULL) {
128 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
129 }
130
131 da->allocated = grow_size;
132
133 DEBUG_PRINTF("allocated new dynarray:%p a:%p entry_size:%zu\n", da, da->a, da->entry_size);
134
135 return da;
136 }
137
138 /* copy item onto end of array */
139 static
140 void *dynarray_add_(struct dynamic_array_ *da, void *item) {
141 void *dst;
142
143 /* make room, make room */
144 if (da->entries == da->allocated) {
145 size_t new_allocated = da->allocated + da->grow_size;
146 void *tmp_ptr = realloc(da->a, new_allocated * da->entry_size);
147 if (tmp_ptr == NULL) {
148 fprintf(stderr, "%s():%s\n", "realloc", strerror(errno));
149 return NULL;
150 }
151 da->a = tmp_ptr;
152 da->allocated = new_allocated;
153
154 DEBUG_PRINTF("grew dynarray:%p\n", da);
155 }
156
157 dst = DYNARRAY_ITEM(*da, da->entries);
158 memcpy(dst, item, da->entry_size);
159
160 da->entries++;
161
162 DEBUG_PRINTF("added dynarray:%p entry:%zu item:%p\n", da, da->entries, item);
163
164 return dst;
165 }
166
167
168 /* locate and return the label entry matching name */
169 static
170 struct label_ *label_find_(struct dynamic_array_ *labels, char *name) {
171 size_t x;
172
173 for (x = 0; x < labels->entries; x++) {
174 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
175 if (strcmp(l->label, name) == 0)
176 return l;
177 }
178 return NULL;
179 }
180
181
182 /* if a label has a validly-calculated address, fetch it */
183 static
184 int label_addr_(struct dynamic_array_ *labels, char *name, DCPU16_WORD *addr) {
185 struct label_ *l;
186
187 if ( (l = label_find_(labels, name)) == NULL )
188 return -1;
189 if (! l->ready)
190 return -2;
191 *addr = l->addr;
192 return 0;
193 }
194
195
196 /* attempt to determine the addresses of all labels */
197 static
198 void label_addr_calculate_(struct dynamic_array_ *instructionps, struct dynamic_array_ *labels) {
199 size_t i;
200
201 /* for each label.. */
202 for (i = 0; i < labels->entries; i++) {
203 struct label_ *l;
204 struct instruction_ **instr;
205 unsigned int word_count = 0;
206
207 l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
208
209 /* if it's already calculated, great. */
210 if (l->ready)
211 continue;
212
213 /*
214 * starting at the instruction for this label,
215 * walk backwards through the list of instructions
216 * until we get to the start or a known prior label address.
217 * update our label with the freshly calculated addr
218 */
219 for (instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
220 instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0);
221 instr--) {
222
223 word_count += (*instr)->length;
224
225 /* have we come across an instruction which a label points to?
226 it should already be calculated, so just add that on and be done */
227 if ((*instr)->label
228 && strcmp((*instr)->label, l->label)) {
229 DCPU16_WORD addr;
230
231 if (label_addr_(labels, (*instr)->label, &addr)) {
232 fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
233 (*instr)->label,
234 l->label);
235 continue;
236 }
237
238 word_count += addr;
239 break;
240 }
241 }
242 l->addr = word_count;
243 l->ready = 1;
244 DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
245 }
246 }
247
248 static
249 void instr_free_(struct instruction_ *i) {
250 if (i->label)
251 free(i->label);
252 if (i->opcode)
253 free(i->opcode);
254 while (i->operands) {
255 struct operand_ *o = i->operands;
256
257 i->operands = o->next;
258 free(o);
259 }
260
261 free(i);
262 }
263
264 /* generate the nibble for a given basic opcode */
265 static
266 int opcode_bits_(char *opcode) {
267 static struct {
268 char op[4];
269 char value;
270 } opcodes_lower_nibble[] = {
271 { "JSR", 0x00 },
272 /* { "future nbi instruction", 0x00 }, */
273 { "SET", 0x01 },
274 { "ADD", 0x02 },
275 { "SUB", 0x03 },
276 { "MUL", 0x04 },
277 { "DIV", 0x05 },
278 { "MOD", 0x06 },
279 { "SHL", 0x07 },
280 { "SHR", 0x08 },
281 { "AND", 0x09 },
282 { "BOR", 0x0a },
283 { "XOR", 0x0b },
284 { "IFE", 0x0c },
285 { "IFN", 0x0d },
286 { "IFG", 0x0e },
287 { "IFB", 0x0f },
288 { "", 0x00 }
289 }, *o;
290
291 for (o = opcodes_lower_nibble; o->op[0]; o++) {
292 if (strcasecmp(o->op, opcode) == 0)
293 break;
294 }
295
296 if (o->op[0] == '\0') {
297 fprintf(stderr, "unknown instruction '%s'\n", opcode);
298 return -1;
299 }
300
301 return o->value;
302 }
303
304 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
305 static
306 int nbi_opcode_bits_(char *nbi_opcode) {
307 static struct {
308 char op[4];
309 char value;
310 } nbi_opcodes_bits[] = {
311 { " ", 0x00 }, /* reserved for future */
312 { "JSR", 0x01 },
313 { "", 0x00 }
314 }, *o;
315
316 for (o = nbi_opcodes_bits; o->op[0]; o++) {
317 if (strcasecmp(o->op, nbi_opcode) == 0)
318 break;
319 }
320
321 if (o->op[0] == '\0') {
322 fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
323 return -1;
324 }
325
326 return o->value;
327 }
328
329 /* convert register character like 'x' to value like 0x03 */
330 static inline
331 unsigned int register_enumerate_(char r) {
332 const char regs[] = "AaBbCcXxYyZzIiJj";
333 const char *x = strchr(regs, r);
334
335 if (x)
336 return (x - regs)/2;
337
338 fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
339 return -1;
340 }
341
342 /* removes all occurences of chars from buf */
343 static inline
344 void buf_strip_chars_(char *buf, char *chars) {
345 char *s, *d;
346
347 for (s = d = buf; *s; s++, d++) {
348 while (*s && strchr(chars, *s)) {
349 s++;
350 }
351 if (!*s)
352 break;
353 *d = *s;
354 }
355 *d = *s;
356 }
357
358 /* value_bits_
359 * generate the six bits for a given operand string
360 * returns -1 if it could not parse the operand
361 * returns -2 if it could not parse the operand due to an unresolved label
362 * notes: nextword may be overwritten even if it's not used in final instruction
363 */
364 static
365 int value_bits_(struct dynamic_array_ *labels, char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
366 static char *operand = NULL;
367 static size_t operand_sz = 0;
368
369 unsigned long l;
370 char *o, *ep;
371
372 /*
373 Our operand working buffer shouldn't ever need to be too big,
374 but DAT might blow that assumption.
375 */
376 if (operand_sz <= strlen(operand_orig)) {
377 void *tmp_ptr;
378 size_t new_sz = strlen(operand_orig);
379
380 if (new_sz < 256)
381 new_sz = 256;
382 new_sz += 256;
383
384 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
385 tmp_ptr = realloc(operand, new_sz);
386 if (tmp_ptr == NULL) {
387 fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
388 return -1;
389 }
390 operand = tmp_ptr;
391 operand_sz = new_sz;
392 }
393
394 o = strcpy(operand, operand_orig);
395
396 DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
397
398 /* this is a very stupid parser */
399
400 /* first, let's trim all whitespace out of string at once to make parsing easier */
401 buf_strip_chars_(operand, " \t\n");
402
403 /* single character might match a register */
404 if (strlen(operand) == 1
405 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
406 DEBUG_PRINTFQ("is register %c\n", *operand);
407 return register_enumerate_(*operand);
408 }
409
410 /* easy matches */
411 if (strcasecmp(operand, "POP") == 0) {
412 DEBUG_PRINTFQ("is POP\n");
413 return 0x18;
414 }
415 if (strcasecmp(operand, "PUSH") == 0) {
416 DEBUG_PRINTFQ("is PUSH\n");
417 return 0x19;
418 }
419 if (strcasecmp(operand, "PEEK") == 0) {
420 DEBUG_PRINTFQ("is PEEK\n");
421 return 0x1a;
422 }
423 if (strcasecmp(operand, "SP") == 0) {
424 DEBUG_PRINTFQ("is register SP\n");
425 return 0x1b;
426 }
427 if (strcasecmp(operand, "PC") == 0) {
428 DEBUG_PRINTFQ("is register PC\n");
429 return 0x1c;
430 }
431 if (strcasecmp(operand, "O") == 0) {
432 DEBUG_PRINTFQ("is register O\n");
433 return 0x1d;
434 }
435
436 /* is the operand [bracketed]? */
437 if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
438 /* eat the brackets */
439 operand[strlen(operand) - 1] = '\0';
440 operand++;
441
442 /* is it [register]? */
443 if (strlen(operand) == 1
444 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
445 DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
446 return 0x08 | register_enumerate_(*operand);
447 }
448
449 /* is it [register+something]? */
450 if ( (ep = strchr(operand, '+')) ) {
451 char *reg;
452 char *constant;
453
454 /* eat the plus */
455 *ep = '\0';
456 ep++;
457
458 /* figure out which one is which */
459 if (strlen(ep) == 1
460 && strchr("AaBbCcXxYyZzIiJj", *ep)) {
461 reg = ep;
462 constant = operand;
463 } else if (strlen(operand) == 1
464 && strchr("AaBbCcXxYyZzIiJj", *operand) ) {
465 reg = operand;
466 constant = ep;
467 } else {
468 DEBUG_PRINTFQ("is unparsable\n");
469 fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
470 return -1;
471 }
472
473 /* check if something is understandable as a value */
474 errno = 0;
475 l = strtoul(constant, &ep, 0);
476 if (errno == 0
477 && (*constant && (*ep == '\0')) ) {
478 /* string conversion went without issue */
479 /* validate it will fit in a word */
480 if (l > 0xffff) {
481 DEBUG_PRINTFQ("is out of range\n");
482 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
483 return -1;
484 }
485
486 /* seems fine */
487 *nextword = l & 0xffff;
488 *nextwordused += 1;
489 DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
490 return 0x10 | register_enumerate_(*reg);
491 } else if (errno) {
492 DEBUG_PRINTFQ("is out of range\n");
493 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
494 return -1;
495 }
496
497 /* what? still here? assume it's a label, I guess */
498 /* try to populate nextword with label address */
499 if (label_addr_(labels, operand, nextword)) {
500 DEBUG_PRINTFQ("(deferred label resolution)\n");
501 *nextwordused += 1;
502 return -2;
503 }
504 DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
505 *nextwordused += 1;
506 return 0x10 | register_enumerate_(*reg);
507 }
508
509 /* it must just be a dereferenced literal then */
510
511 errno = 0;
512 l = strtoul(operand, &ep, 0);
513 if (errno == 0
514 && (*operand && (*ep == '\0')) ) {
515 /* string conversion went without issue */
516 /* validate it will fit in a word */
517 if (l > 0xffff) {
518 DEBUG_PRINTFQ("is out of range\n");
519 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
520 return -1;
521 }
522
523 DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
524 *nextword = l & 0xffff;
525 *nextwordused += 1;
526 return 0x1e;
527 } else if (errno) {
528 DEBUG_PRINTFQ("is out of range\n");
529 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
530 }
531
532 /* not a number? try a label */
533 if (label_addr_(labels, operand, nextword)) {
534 DEBUG_PRINTFQ("(deferred label resolution)\n");
535 *nextwordused += 1;
536 return -2;
537 }
538 DEBUG_PRINTFQ("is a dereferenced label\n");
539 *nextwordused += 1;
540 return 0x1e;
541 }
542
543 /* left with a literal or a label, then */
544
545 errno = 0;
546 l = strtoul(operand, &ep, 0);
547 if (errno == 0
548 || (*operand && (*ep == '\0')) ) {
549 if (l > 0xffff) {
550 DEBUG_PRINTFQ("is out of range\n");
551 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
552 return -1;
553 }
554
555 DEBUG_PRINTFQ("is literal value (%lu)\n", l);
556 if (l < 0x20) {
557 return l + 0x20;
558 }
559
560 *nextword = l & 0xffff;
561 *nextwordused += 1;
562 return 0x1f;
563 }
564
565 /* try to populate nextword with label address */
566 if (label_addr_(labels, operand, nextword)) {
567 DEBUG_PRINTFQ("(deferred label resolution)\n");
568 /* assume non-small literal value */
569 *nextwordused += 1;
570 return -2;
571 }
572
573 DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
574 if (*nextword < 0x20 && allow_short_labels) {
575 DEBUG_PRINTF("small value label win\n");
576 return (0x20 + *nextword) & 0x3f;
577 }
578
579 *nextwordused += 1;
580 return 0x1f;
581 }
582
583 /* prints an instruction's assembly */
584 static inline
585 int instruction_print_(struct instruction_ *i, unsigned int with_label) {
586 struct operand_ *o;
587 int r;
588
589 if (with_label)
590 r = printf("%-16s %3s", i->label ? i->label : "", i->opcode);
591 else
592 r = printf("%3s", i->opcode);
593
594 for (o = i->operands; o; o = o->next)
595 r += printf(" %s%s", o->operand, o->next ? "," : "");
596
597 return r;
598 }
599
600 /* parse an instruction out of buf, create new instruction struct if seemingly valid */
601 /* does not actually check if instruction is valid yet */
602 /* buf must be 0-terminated */
603 static
604 int buf_tokenize_(char *buf, struct instruction_ **next_instr) {
605 const char const *sep = " \t\n";
606 struct instruction_ *instr = NULL;
607 char *label = NULL,
608 *opcode = NULL,
609 *operand = NULL;
610
611 char *x,
612 *y,
613 *st;
614
615 assert(buf != NULL);
616 assert(next_instr != NULL);
617
618 *next_instr = NULL;
619
620 /* kill comments */
621 if ((x = strchr(buf, ';')) != NULL)
622 *x = '\0';
623 /* kill leading whitespace */
624 buf += strspn(buf, " \t\n");
625 /* kill trailing whitespace */
626 if (*buf) {
627 x = buf + strlen(buf);
628 while (strchr(" \t\n", *x)) {
629 *x = '\0';
630 x--;
631 }
632 }
633
634 if ((x = strrchr(buf, '\n')) != NULL)
635 *x = '\0';
636
637 /* determine if first token is label, opcode, or we just have a blank line to ignore */
638 x = strtok_r(buf, sep, &st);
639
640 /* empty line? nothing to do here. */
641 if (x == NULL)
642 return 0;
643
644 #ifdef OTHER_LABELS
645 /* labels end with :, otherwise its an opcode */
646 y = x + strlen(x) - 1;
647 if (*y == ':') {
648 *y = '\0';
649 label = x;
650 opcode = strtok_r(NULL, sep, &st);
651 }
652 #else /* OTHER_LABELS */
653 /* labels.. begin? with ':' ? okay, I guess. Whatever. */
654 /* otherwise, it's an opcode */
655 if (*x == ':') {
656 label = x + 1;
657 opcode = strtok_r(NULL, sep, &st);
658 } else {
659 label = NULL;
660 opcode = x;
661 }
662 #endif /* OTHER_LABELS */
663
664 if (opcode) {
665 operand = st;
666 }
667
668 /* extra room for assembled words */
669 instr = calloc(1, 3 + sizeof *instr);
670 if (instr == NULL) {
671 fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
672 return -1;
673 }
674
675 instr->label = label ? strdup(label) : NULL;
676 instr->opcode = opcode ? strdup(opcode) : NULL;
677
678 if (operand) {
679 struct operand_ **o_next = &instr->operands;
680
681 for (x = strtok_r(operand, ",", &st);
682 x;
683 x = strtok_r(NULL, ",", &st) ) {
684 *o_next = malloc(3 + sizeof **o_next); /* FIXME: handle this on the fly later */
685
686 if (*o_next == NULL) {
687 fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
688 instr_free_(instr);
689 return -1;
690 }
691
692 /* trim */
693 x += strspn(x, " \t\n");
694 if (*x) {
695 y = x + strlen(x) - 1;
696 while (strchr(" \t\n", *y)) {
697 *y = '\0';
698 y--;
699 }
700 }
701
702 (*o_next)->operand = strdup(x);
703 (*o_next)->next = NULL;
704 o_next = &((*o_next)->next);
705 }
706 }
707
708 *next_instr = instr;
709
710 return 0;
711 }
712
713 /* try to generate bytecode for an instruction */
714 static
715 int instr_assemble_(struct dynamic_array_ *labels, struct instruction_ *i, unsigned int allow_short_labels) {
716 unsigned int nwu = 0; /* number of words used */
717 unsigned int incomplete = 0;
718 int bits;
719 struct operand_ *o = i->operands;
720
721 if (opt_.verbose > 2) {
722 printf("%s: assembling %p ", __func__, i);
723 instruction_print_(i, 1);
724 printf("\n");
725 }
726
727 if (i->ready) {
728 /* already assembled, nothing to do */
729 return 0;
730 }
731
732 /* special case DAT */
733 if (strncasecmp(i->opcode, "DAT", 3) == 0) {
734 /* just dump operands into words, I guess */
735 fprintf(stderr, "FIXME unhandled raw data\n");
736 /* count total length of data.. */
737 /* realloc instruction */
738 /* populate words */
739 return 0;
740 }
741
742 /* start with opcode bits */
743 bits = opcode_bits_(i->opcode);
744 if (bits < 0) {
745 fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
746 for (o = i->operands; o; o = o->next)
747 fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
748 fprintf(stderr, "'\n");
749 return -1;
750 }
751 i->instr_words[0] |= 0x0f & bits;
752
753 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
754 if ((bits & 0x0f) == 0) {
755 bits = nbi_opcode_bits_(i->opcode);
756 if (bits < 0) {
757 fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
758 exit(EX_SOFTWARE);
759 }
760 } else {
761 if (o == NULL) {
762 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
763 return -1;
764 }
765 bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
766 if (bits == -1) {
767 fprintf(stderr, "couldn't assemble instruction\n");
768 return -1;
769 } else if (bits == -2) {
770 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
771 /* keep going, but don't finalize until we can calculate label address */
772 incomplete = 1;
773 bits = 0;
774 }
775 o = o->next;
776 }
777 i->instr_words[0] |= (bits & 0x3f) << 4;
778
779 if (o == NULL) {
780 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
781 return -1;
782 }
783
784 bits = value_bits_(labels, o->operand, i->instr_words + nwu + 1, &nwu, allow_short_labels);
785 if (bits == -1) {
786 fprintf(stderr, "couldn't assemble instruction\n");
787 return -1;
788 } else if (bits == -2) {
789 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
790 /* keep going, but don't finalize until we can calculate label address */
791 incomplete = 1;
792 bits = 0;
793 }
794 o = o->next;
795 i->instr_words[0] |= (bits & 0x3f) << 10;
796
797 if (o != NULL) {
798 fprintf(stderr, "too many operands\n");
799 return -1;
800 }
801
802 /* counting labels as words, we now know at least the maximum instruction length */
803
804 i->length = nwu + 1;
805
806 DEBUG_PRINTF("instruction words: [%u]", i->length);
807 for (bits = 0; bits <= (int)nwu; bits++)
808 DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
809
810 if (incomplete) {
811 DEBUG_PRINTFQ(" (preliminary)");
812 } else {
813 i->ready = 1;
814 }
815
816 DEBUG_PRINTFQ("\n");
817
818 return 0;
819 }
820
821 /* parse_stream_
822 * read lines from stream f
823 * break each line into parts, populate parts into structures
824 */
825 static
826 int parse_stream_(FILE *f, const char *src, struct dynamic_array_ *instructionps, struct dynamic_array_ *labels, unsigned int allow_short_labels) {
827 struct instruction_ *instr, **instr_list_entry;
828 unsigned int line = 0;
829 int retval = 0;
830 char buf[0x4000];
831
832 buf[sizeof buf - 1] = '\0';
833
834 while (fgets(buf, sizeof buf, f)) {
835 line++;
836
837 if (buf[sizeof buf - 1] != '\0') {
838 fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
839 retval = -1;
840 break;
841 }
842
843 if (buf_tokenize_(buf, &instr)) {
844 fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
845 retval = -1;
846 break;
847 }
848
849 if (instr) {
850 /* add to list of instructions */
851 instr_list_entry = dynarray_add_(instructionps, &instr);
852 if (instr_list_entry == NULL) {
853 fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
854 break;
855 }
856
857 if (instr->label) {
858 struct label_ new_label = {
859 .label = instr->label,
860 .instr = instr_list_entry,
861 .ready = 0,
862 .addr = 0,
863 };
864 if (label_find_(labels, instr->label)) {
865 fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
866 break;
867 }
868
869 if (dynarray_add_(labels, &new_label) == NULL) {
870 fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
871 break;
872 }
873 label_addr_calculate_(instructionps, labels);
874 }
875
876 instr_assemble_(labels, instr, allow_short_labels);
877 }
878 }
879 if (ferror(f)) {
880 fprintf(stderr, "%s():%s\n", "fgets", strerror(errno));
881 return -1;
882 }
883 if (! feof(f)) {
884 fprintf(stderr, "parsing aborted\n");
885 return -1;
886 }
887
888 return retval;
889 }
890
891 /* assemble_check_
892 * make a full pass over instruction list to resolve labels
893 */
894 static
895 int assemble_check_(struct dynamic_array_ *instructionps, struct dynamic_array_ *labels, unsigned int allow_short_labels) {
896 int retval = 0;
897 size_t x;
898
899 DEBUG_PRINTF(" final pass of assembler...\n");
900 for (x = 0; x < instructionps->entries; x++) {
901 struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
902 retval |= instr_assemble_(labels, *instrp, allow_short_labels);
903 if (retval) {
904 fprintf(stderr, "instruction %zu failed to assemble\n", x);
905 }
906 }
907
908 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
909 for (x = 0; x < labels->entries; x++) {
910 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
911 if (! l->ready)
912 retval |= -1;
913 if (opt_.verbose) {
914 printf("%3s0x%04x %-32s ",
915 l->ready ? "" : "*",
916 l->addr,
917 l->label);
918 instruction_print_(*(l->instr), 0);
919 printf("\n");
920 }
921 }
922
923 VERBOSE_PRINTF("\n");
924
925 if (retval)
926 fprintf(stderr, "some labels could not be resolved\n");
927
928 return retval;
929 }
930
931 static
932 int output_(struct dynamic_array_ *instructionps, const char *filename) {
933 FILE *of = NULL;
934 struct instruction_ **instrp;
935 size_t i, r, total_words = 0;
936 size_t x;
937
938 if (! opt_.dryrun) {
939 of = fopen(filename, "w");
940 if (of == NULL) {
941 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
942 return -1;
943 }
944 }
945
946 for (i = 0; i < instructionps->entries; i++) {
947 instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
948
949 if (opt_.verbose) {
950 int s;
951 s = instruction_print_(*instrp, 1);
952 printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
953 for (x = 0; x < (*instrp)->length; x++) {
954 printf(" %04x", (*instrp)->instr_words[x]);
955 }
956 printf("\n");
957 }
958
959 if (of) {
960 r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
961 if (r < (*instrp)->length) {
962 fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
963 return -1;
964 }
965 }
966 total_words += (*instrp)->length;
967 }
968
969 fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
970 opt_.dryrun ? "assembled" : "wrote",
971 i,
972 total_words);
973
974 return 0;
975 }
976
977 static struct dynamic_array_ *instructionps_;
978 static struct dynamic_array_ *labels_;
979
980 int main(int argc, char *argv[]) {
981 const char *out_filename = NULL;
982 unsigned int allow_short_labels = 0;
983 int c;
984
985 while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
986 switch (c) {
987 case 'd':
988 opt_.dryrun++;
989 break;
990
991 case 's':
992 allow_short_labels++;
993 break;
994
995 case 'o':
996 if (out_filename) {
997 fprintf(stderr, "Sorry, I can only write one file at a time.\n");
998 exit(EX_CANTCREAT);
999 }
1000 out_filename = optarg;
1001 break;
1002
1003 case 'v':
1004 opt_.verbose++;
1005 break;
1006
1007 case 'h':
1008 usage_(argv[0], 1);
1009 exit(EX_OK);
1010
1011 default:
1012 usage_(argv[0], 0);
1013 exit(EX_USAGE);
1014 }
1015 }
1016
1017 argc -= optind;
1018 argv += optind;
1019
1020 if (out_filename == NULL)
1021 out_filename = out_filename_default_;
1022
1023 /* init tables */
1024 instructionps_ = dynarray_new_(sizeof (struct instruction_ *), 1024);
1025 labels_ = dynarray_new_(sizeof(struct label_), 256);
1026 if (instructionps_ == NULL
1027 || labels_ == NULL) {
1028 fprintf(stderr, "failed to initialize\n");
1029 exit(EX_OSERR);
1030 }
1031
1032 /* if filenames were specified, parse them instead of stdin */
1033 if (argc) {
1034 while (argc) {
1035 char *filename = *argv;
1036 FILE *f = fopen(filename, "r");
1037
1038 argc--, argv++;
1039
1040 if (f == NULL) {
1041 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1042 continue;
1043 }
1044
1045 VERBOSE_PRINTF("assembling '%s'...\n", filename);
1046 parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
1047
1048 fclose(f);
1049 }
1050 } else {
1051 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
1052 parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
1053 }
1054
1055 if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
1056 fprintf(stderr, "errors prevented assembly\n");
1057 exit(EX_DATAERR);
1058 }
1059
1060 if (output_(instructionps_, out_filename)) {
1061 fprintf(stderr, "failed to create output\n");
1062 exit(EX_OSERR);
1063 }
1064
1065 exit(EX_OK);
1066 }