fixing sloppy errors introduced with new strqtok
[dcpu16] / as-dcpu16.c
1 #include <stdlib.h>
2 #include <unistd.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sysexits.h>
7 #include <assert.h>
8
9 #include "dcpu16.h"
10 #include "common.h"
11
12 /*
13 * quick and dirty assembler for dcpu16
14 *
15 * Justin Wind <justin.wind@gmail.com>
16 * 2012 04 07 - implementation started
17 * 2012 04 10 - functional
18 * 2012 04 16 - support dat statements
19 *
20 * TODO
21 * needs ability to specify location for code or data
22 * short labels not correctly computed
23 */
24
25 static const char * const src_id_ = "$Id$";
26
27 const char const out_filename_default_[] = "a.out";
28
29 /* global invocation options */
30 struct options {
31 unsigned int verbose;
32 unsigned int dryrun;
33 } opt_ = {
34 .verbose = 0,
35 .dryrun = 0,
36 };
37
38 #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
39 #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
40 #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
41
42 static
43 void usage_(char *prog, unsigned int full) {
44 FILE *f = full ? stdout : stderr;
45 char *x = strrchr(prog, '/');
46
47 if (x && *(x + 1))
48 prog = x + 1;
49
50 if (full)
51 fprintf(f, "%s -- \n\n",
52 prog);
53
54 fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
55 prog);
56
57 if (full) {
58 fprintf(f, "\nOptions:\n"
59 "\t-h -- this screen\n"
60 "\t-o <file> -- output to <file> [default: %s]\n"
61 "\t-s -- allow short labels in instruction words\n"
62 "\t-d -- dry run, print results, do not write to file\n"
63 "\t-v -- verbose output\n",
64 out_filename_default_);
65
66 fprintf(f, "\n%78s\n",
67 src_id_);
68 }
69 }
70
71
72 /* instructions have operands */
73 struct operand_ {
74 struct operand_ *next;
75 char *operand; /* tokenized operand text */
76 };
77
78 /* keep an array of instructions as we read them in */
79 struct instruction_ {
80 size_t src_line;
81 char *label; /* set if a label points here */
82 char *opcode; /* tokenized instruction text */
83 struct operand_ *operands; /* list of operands */
84 unsigned int ready : 1; /* bytecode computed? */
85 unsigned int length; /* number of words of bytecode */
86 DCPU16_WORD instr_words[];
87 };
88
89 /* keep an array of labels, indexed back to their instruction locations */
90 struct label_ {
91 char *label; /* name of label */
92 struct instruction_ **instr; /* pointer into array of instructions */
93 unsigned int ready : 1; /* do we know where this label is yet? */
94 DCPU16_WORD addr;
95 };
96
97
98 /* locate and return the label entry matching name */
99 static
100 struct label_ *label_find_(struct dynamic_array *labels, char *name) {
101 size_t x;
102
103 for (x = 0; x < labels->entries; x++) {
104 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
105 if (strcmp(l->label, name) == 0)
106 return l;
107 }
108 return NULL;
109 }
110
111
112 /* if a label has a validly-calculated address, fetch it */
113 static
114 int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
115 struct label_ *l;
116
117 if ( (l = label_find_(labels, name)) == NULL )
118 return -1;
119 if (! l->ready)
120 return -2;
121 *addr = l->addr;
122 return 0;
123 }
124
125
126 /* attempt to determine the addresses of all labels */
127 static
128 void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
129 size_t i;
130
131 /* for each label.. */
132 for (i = 0; i < labels->entries; i++) {
133 struct label_ *l;
134 struct instruction_ **instr;
135 unsigned int word_count = 0;
136
137 l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
138
139 /* if it's already calculated, great. */
140 if (l->ready)
141 continue;
142
143 /*
144 * starting at the instruction for this label,
145 * walk backwards through the list of instructions
146 * until we get to the start or a known prior label address.
147 * update our label with the freshly calculated addr
148 */
149 for (instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
150 instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0);
151 instr--) {
152
153 if ((*instr)->ready)
154 DEBUG_PRINTF("%s: instr not ready\n", __func__);
155 word_count += (*instr)->length;
156
157 /* have we come across an instruction which a label points to?
158 it should already be calculated, so just add that on and be done */
159 if ((*instr)->label
160 && strcmp((*instr)->label, l->label)) {
161 DCPU16_WORD addr;
162
163 if (label_addr_(labels, (*instr)->label, &addr)) {
164 fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
165 (*instr)->label,
166 l->label);
167 continue;
168 }
169
170 word_count += addr;
171 break;
172 }
173 }
174 l->addr = word_count;
175 l->ready = 1;
176 DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
177 }
178 }
179
180
181 /* generate the nibble for a given basic opcode */
182 static
183 int opcode_bits_(char *opcode) {
184 static struct {
185 char op[4];
186 char value;
187 } opcodes_lower_nibble[] = {
188 { "JSR", 0x00 },
189 /* { "future nbi instruction", 0x00 }, */
190 { "SET", 0x01 },
191 { "ADD", 0x02 },
192 { "SUB", 0x03 },
193 { "MUL", 0x04 },
194 { "DIV", 0x05 },
195 { "MOD", 0x06 },
196 { "SHL", 0x07 },
197 { "SHR", 0x08 },
198 { "AND", 0x09 },
199 { "BOR", 0x0a },
200 { "XOR", 0x0b },
201 { "IFE", 0x0c },
202 { "IFN", 0x0d },
203 { "IFG", 0x0e },
204 { "IFB", 0x0f },
205 { "", 0x00 }
206 }, *o;
207
208 for (o = opcodes_lower_nibble; o->op[0]; o++) {
209 if (strcasecmp(o->op, opcode) == 0)
210 break;
211 }
212
213 if (o->op[0] == '\0') {
214 fprintf(stderr, "unknown instruction '%s'\n", opcode);
215 return -1;
216 }
217
218 return o->value;
219 }
220
221 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
222 static
223 int nbi_opcode_bits_(char *nbi_opcode) {
224 static struct {
225 char op[4];
226 char value;
227 } nbi_opcodes_bits[] = {
228 { " ", 0x00 }, /* reserved for future */
229 { "JSR", 0x01 },
230 { "", 0x00 }
231 }, *o;
232
233 for (o = nbi_opcodes_bits; o->op[0]; o++) {
234 if (strcasecmp(o->op, nbi_opcode) == 0)
235 break;
236 }
237
238 if (o->op[0] == '\0') {
239 fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
240 return -1;
241 }
242
243 return o->value;
244 }
245
246 /* convert register character like 'x' to value like 0x03 */
247 static inline
248 unsigned int register_enumerate_(char r) {
249 const char regs[] = "AaBbCcXxYyZzIiJj";
250 const char *x = strchr(regs, r);
251
252 if (x)
253 return (x - regs)/2;
254
255 fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
256 return -1;
257 }
258
259 /* removes all occurences of chars from buf */
260 static inline
261 void buf_strip_chars_(char *buf, char *chars) {
262 char *s, *d;
263
264 for (s = d = buf; *s; s++, d++) {
265 while (*s && strchr(chars, *s)) {
266 s++;
267 }
268 if (!*s)
269 break;
270 *d = *s;
271 }
272 *d = *s;
273 }
274
275
276 /* value_bits_
277 * generate the six bits for a given operand string
278 * returns -1 if it could not parse the operand
279 * returns -2 if it could not parse the operand due to an unresolved label
280 * notes: nextword may be overwritten even if it's not used in final instruction
281 */
282 static
283 int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
284 static char *operand = NULL;
285 static size_t operand_sz = 0;
286
287 unsigned long l;
288 char *o, *ep;
289
290 /*
291 Our operand working buffer shouldn't ever need to be too big,
292 but DAT might blow that assumption.
293 */
294 if (operand_sz <= strlen(operand_orig)) {
295 void *tmp_ptr;
296 size_t new_sz = strlen(operand_orig);
297
298 if (new_sz < 256)
299 new_sz = 256;
300 new_sz += 256;
301
302 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
303 tmp_ptr = realloc(operand, new_sz);
304 if (tmp_ptr == NULL) {
305 fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
306 return -1;
307 }
308 operand = tmp_ptr;
309 operand_sz = new_sz;
310 }
311
312 o = strcpy(operand, operand_orig);
313
314 DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
315
316 /* this is a very stupid parser */
317
318 /* first, let's trim all whitespace out of string at once to make parsing easier */
319 buf_strip_chars_(operand, " \t\n");
320
321 /* single character might match a register */
322 if (strlen(operand) == 1
323 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
324 DEBUG_PRINTFQ("is register %c\n", *operand);
325 return register_enumerate_(*operand);
326 }
327
328 /* easy matches */
329 if (strcasecmp(operand, "POP") == 0) {
330 DEBUG_PRINTFQ("is POP\n");
331 return 0x18;
332 }
333 if (strcasecmp(operand, "PUSH") == 0) {
334 DEBUG_PRINTFQ("is PUSH\n");
335 return 0x19;
336 }
337 if (strcasecmp(operand, "PEEK") == 0) {
338 DEBUG_PRINTFQ("is PEEK\n");
339 return 0x1a;
340 }
341 if (strcasecmp(operand, "SP") == 0) {
342 DEBUG_PRINTFQ("is register SP\n");
343 return 0x1b;
344 }
345 if (strcasecmp(operand, "PC") == 0) {
346 DEBUG_PRINTFQ("is register PC\n");
347 return 0x1c;
348 }
349 if (strcasecmp(operand, "O") == 0) {
350 DEBUG_PRINTFQ("is register O\n");
351 return 0x1d;
352 }
353
354 /* is the operand [bracketed]? */
355 if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
356 /* eat the brackets */
357 operand[strlen(operand) - 1] = '\0';
358 operand++;
359
360 /* is it [register]? */
361 if (strlen(operand) == 1
362 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
363 DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
364 return 0x08 | register_enumerate_(*operand);
365 }
366
367 /* is it [register+something]? */
368 if ( (ep = strchr(operand, '+')) ) {
369 char *reg;
370 char *constant;
371
372 /* eat the plus */
373 *ep = '\0';
374 ep++;
375
376 /* figure out which one is which */
377 if (strlen(ep) == 1
378 && strchr("AaBbCcXxYyZzIiJj", *ep)) {
379 reg = ep;
380 constant = operand;
381 } else if (strlen(operand) == 1
382 && strchr("AaBbCcXxYyZzIiJj", *operand) ) {
383 reg = operand;
384 constant = ep;
385 } else {
386 DEBUG_PRINTFQ("is unparsable\n");
387 fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
388 return -1;
389 }
390
391 /* check if something is understandable as a value */
392 errno = 0;
393 l = strtoul(constant, &ep, 0);
394 if (errno == 0
395 && (*constant && (*ep == '\0')) ) {
396 /* string conversion went without issue */
397 /* validate it will fit in a word */
398 if (l > 0xffff) {
399 DEBUG_PRINTFQ("is out of range\n");
400 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
401 return -1;
402 }
403
404 /* seems fine */
405 *nextword = l & 0xffff;
406 *nextwordused += 1;
407 DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
408 return 0x10 | register_enumerate_(*reg);
409 } else if (errno) {
410 DEBUG_PRINTFQ("is out of range\n");
411 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
412 return -1;
413 }
414
415 /* what? still here? assume it's a label, I guess */
416 /* try to populate nextword with label address */
417 if (label_addr_(labels, operand, nextword)) {
418 DEBUG_PRINTFQ("(deferred label resolution)\n");
419 *nextwordused += 1;
420 return -2;
421 }
422 DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
423 *nextwordused += 1;
424 return 0x10 | register_enumerate_(*reg);
425 }
426
427 /* it must just be a dereferenced literal then */
428
429 errno = 0;
430 l = strtoul(operand, &ep, 0);
431 if (errno == 0
432 && (*operand && (*ep == '\0')) ) {
433 /* string conversion went without issue */
434 /* validate it will fit in a word */
435 if (l > 0xffff) {
436 DEBUG_PRINTFQ("is out of range\n");
437 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
438 return -1;
439 }
440
441 DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
442 *nextword = l & 0xffff;
443 *nextwordused += 1;
444 return 0x1e;
445 } else if (errno) {
446 DEBUG_PRINTFQ("is out of range\n");
447 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
448 }
449
450 /* not a number? try a label */
451 if (label_addr_(labels, operand, nextword)) {
452 DEBUG_PRINTFQ("(deferred label resolution)\n");
453 *nextwordused += 1;
454 return -2;
455 }
456 DEBUG_PRINTFQ("is a dereferenced label\n");
457 *nextwordused += 1;
458 return 0x1e;
459 }
460
461 /* left with a literal or a label, then */
462
463 errno = 0;
464 l = strtoul(operand, &ep, 0);
465 if (errno == 0
466 || (*operand && (*ep == '\0')) ) {
467 if (l > 0xffff) {
468 DEBUG_PRINTFQ("is out of range\n");
469 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
470 return -1;
471 }
472
473 DEBUG_PRINTFQ("is literal value (%lu)\n", l);
474 if (l < 0x20) {
475 return l + 0x20;
476 }
477
478 *nextword = l & 0xffff;
479 *nextwordused += 1;
480 return 0x1f;
481 }
482
483 /* try to populate nextword with label address */
484 if (label_addr_(labels, operand, nextword)) {
485 DEBUG_PRINTFQ("(deferred label resolution)\n");
486 /* assume non-small literal value */
487 *nextwordused += 1;
488 return -2;
489 }
490
491 DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
492 if (*nextword < 0x20 && allow_short_labels) {
493 DEBUG_PRINTF("small value label win\n");
494 return (0x20 + *nextword) & 0x3f;
495 }
496
497 *nextwordused += 1;
498 return 0x1f;
499 }
500
501 /* prints an instruction's assembly */
502 static inline
503 int instruction_print_(struct instruction_ *i, unsigned int with_label) {
504 struct operand_ *o;
505 int r;
506
507 if (with_label)
508 r = printf("%-16s %3s", i->label ? i->label : "", i->opcode);
509 else
510 r = printf("%3s", i->opcode);
511
512 for (o = i->operands; o; o = o->next)
513 r += printf(" %s%s", o->operand, o->next ? "," : "");
514
515 return r;
516 }
517
518 /* buf_tokenize_
519 * Parses the zero-terminated line of input 'buf' into a newly-allocated struct instruction_.
520 * [label] opcode [operand[,operand[,...]]]
521 * Does not yet validate if labels, opcodes, or operands are valid...
522 */
523 static
524 int buf_tokenize_(char *buf, struct instruction_ **next_instr) {
525 const char const *sep = " \t\n";
526 const char const *quot = "'`\"";
527 struct instruction_ *instr = NULL;
528 struct operand_ *operand_list = NULL;
529 struct operand_ **o_next = &operand_list;
530 char *label = NULL,
531 *opcode = NULL;
532 char *x,
533 *y,
534 *st, *qt;
535 size_t instr_words_needed = 1;
536
537 assert(buf != NULL);
538 assert(next_instr != NULL);
539
540 *next_instr = NULL;
541
542 /* kill leading whitespace */
543 buf += strspn(buf, sep);
544
545 /* locate first non-quoted ';', ignore anything following it */
546 x = strqtok_r(buf, ";", '\\', quot, &qt, &st);
547 if (x == NULL)
548 return 0;
549 if (qt) {
550 fprintf(stderr, "unmatched %c-quote\n", *qt);
551 return -1;
552 }
553 if (*buf == '\0')
554 return 0;
555
556 /* kill trailing whitespace */
557 for (x = buf + strlen(buf) - 1; *x && strchr(sep, *x); x--)
558 *x = '\0';
559 if (*buf == '\0')
560 return 0;
561
562 DEBUG_PRINTF("trimmed buf: '%s'\n", buf);
563
564 /* determine if first token is label, opcode, or we just have a blank line to ignore */
565 x = strqtok_r(buf, sep, '\\', quot, &qt, &st);
566 if (x == NULL || *x == '\0')
567 return 0;
568 if (qt) {
569 fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
570 return -1;
571 }
572
573 /* I want c-style labels in my asm, but example in spec uses : in prefix rather than postfix */
574 #ifdef NON_SPEC_LABELS
575 /* labels end with :, otherwise its an opcode */
576 y = x + strlen(x) - 1;
577 if (*y == ':') {
578 DEBUG_PRINTF("found label '%s'\n", y);
579 *y = '\0';
580 label = x;
581 opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
582 if (qt) {
583 fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
584 return -1;
585 }
586 } else {
587 label = NULL;
588 opcode = x;
589 }
590 #else /* NON_SPEC_LABELS */
591 /* labels.. begin? with ':' ? okay, I guess. Whatever. */
592 /* otherwise, it's an opcode */
593 if (*x == ':') {
594 DEBUG_PRINTF("found label '%s'\n", x);
595 label = x + 1;
596 opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
597 if (qt) {
598 fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
599 return -1;
600 }
601 } else {
602 label = NULL;
603 opcode = x;
604 }
605 #endif /* NON_SPEC_LABELS */
606
607 if ( !label && (!opcode || !*opcode) ) {
608 DEBUG_PRINTF("no label nor instruction?\n");
609 return 0;
610 }
611
612 DEBUG_PRINTF("label:'%s' opcode:'%s' operands:'%s'\n", label, opcode, st);
613
614 /*
615 While normal instructions just have comma-separated operands,
616 DAT can be followed by comma-separated list of:
617 label, to be resolved to address
618 value, like 0xffff
619 string, "quoted", characters to be rendered into low-byte of words
620 */
621
622 while ( (x = strqtok_r(NULL, ",", '\\', quot, &qt, &st)) ) {
623 DEBUG_PRINTF("\tx:'%s' qt:'%s' st:'%s'\n", x, qt, st);
624
625 if (qt) {
626 fprintf(stderr, "unmatched %c-quote '%s'\n", *qt, qt);
627 return -1;
628 }
629
630 /* trim trailing whitespace */
631 y = x + strlen(x) - 1;
632 while (strchr(sep, *y)) {
633 *y = '\0';
634 y--;
635 }
636
637 /* new operand to append to list */
638 *o_next = malloc(sizeof **o_next);
639 if (*o_next == NULL) {
640 fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
641 return -1;
642 }
643
644 /* assume an operand takes up one word, unless it's a string */
645 /* if it's a string, it comes with quotes, which will get stripped, but will include trailing zero */
646 instr_words_needed += (*x == '"') ? strlen(x) - 1 : 1;
647
648 (*o_next)->operand = strdup(x);
649 if ((*o_next)->operand == NULL) {
650 fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
651 return -1;
652 }
653 (*o_next)->next = NULL;
654 o_next = &((*o_next)->next);
655 }
656
657 DEBUG_PRINTF("allocating instr with room for %zu words\n", instr_words_needed);
658
659 /* extra room for assembled words */
660 instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
661 if (instr == NULL) {
662 fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
663 return -1;
664 }
665
666 instr->label = label ? strdup(label) : NULL;
667 instr->opcode = opcode ? strdup(opcode) : NULL;
668 instr->operands = operand_list;
669
670 *next_instr = instr;
671
672 return 0;
673 }
674
675 /* try to generate bytecode for an instruction */
676 /* returns -1 on unrecoverable error */
677 static
678 int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
679 unsigned int nwu = 0; /* number of words used */
680 unsigned int incomplete = 0;
681 int bits;
682 struct operand_ *o = i->operands;
683
684 if (opt_.verbose > 2) {
685 printf("%s: assembling %p ", __func__, i);
686 instruction_print_(i, 1);
687 printf("(line :%zu)\n", i->src_line);
688 }
689
690 if (i->ready) {
691 /* already assembled, nothing to do */
692 return 0;
693 }
694
695 /* special case DAT */
696 if (strncasecmp(i->opcode, "DAT", 3) == 0) {
697 DEBUG_PRINTF("processing DAT...\n");
698
699 i->length = 0;
700
701 for ( /* */ ; o; o = o->next) {
702 size_t j, dat_len;
703 char *x;
704 unsigned long l;
705
706 DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, o->next);
707
708 /* is this a string? */
709 if ( (x = strchr("\"'`", o->operand[0])) ) {
710 dat_len = strlen(o->operand) - 1;
711 if (o->operand[dat_len] == *x) {
712 /* it is a string */
713 DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
714
715 for (j = 0, x = o->operand + 1;
716 j < dat_len - 1;
717 j++, x++) {
718 i->instr_words[i->length] = *x;
719 i->length++;
720 }
721 /* Note that strings in DAT do not include their zero-terminators */
722 /* specify as 'DAT "string", 0' */
723 }
724 continue;
725 }
726
727 /* is this a number? */
728 char *ep;
729 errno = 0;
730 l = strtoul(o->operand, &ep, 0);
731 if (errno == 0
732 && (*o->operand && (*ep == '\0')) ) {
733 /* conversion succeeded */
734 if (l > 0xffff) {
735 fprintf(stderr, "value '%lu' out of range\n", l);
736 return -1;
737 }
738 i->instr_words[i->length] = l;
739 i->length++;
740 continue;
741 }
742
743 /* otherwise assume it's a label, even if we don't know what it is */
744 if (label_addr_(labels, o->operand, &i->instr_words[i->length])) {
745 DEBUG_PRINTF("(deferred label resolution)\n");
746 incomplete = 1;
747 }
748 i->length++;
749 }
750
751 if (incomplete) {
752 DEBUG_PRINTF("pending label address\n");
753 } else {
754 i->ready = 1;
755 }
756
757 return 0;
758 } /* end of DAT */
759
760 /* start with opcode bits */
761 bits = opcode_bits_(i->opcode);
762 if (bits < 0) {
763 fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
764 for (o = i->operands; o; o = o->next)
765 fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
766 fprintf(stderr, "'\n");
767 return -1;
768 }
769 i->instr_words[0] |= 0x0f & bits;
770
771 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
772 if ((bits & 0x0f) == 0) {
773 bits = nbi_opcode_bits_(i->opcode);
774 if (bits < 0) {
775 fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
776 exit(EX_SOFTWARE);
777 }
778 } else {
779 if (o == NULL) {
780 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
781 return -1;
782 }
783 bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
784 if (bits == -1) {
785 fprintf(stderr, "couldn't assemble instruction\n");
786 return -1;
787 } else if (bits == -2) {
788 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
789 /* keep going, but don't finalize until we can calculate label address */
790 incomplete = 1;
791 bits = 0;
792 }
793 o = o->next;
794 }
795 i->instr_words[0] |= (bits & 0x3f) << 4;
796
797 if (o == NULL) {
798 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
799 return -1;
800 }
801
802 bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
803 if (bits == -1) {
804 fprintf(stderr, "couldn't assemble instruction\n");
805 return -1;
806 } else if (bits == -2) {
807 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
808 /* keep going, but don't finalize until we can calculate label address */
809 incomplete = 1;
810 bits = 0;
811 }
812 o = o->next;
813 i->instr_words[0] |= (bits & 0x3f) << 10;
814
815 if (o != NULL) {
816 fprintf(stderr, "too many operands\n");
817 return -1;
818 }
819
820 /* counting labels as words, we now know at least the maximum instruction length */
821
822 i->length = nwu + 1;
823
824 DEBUG_PRINTF("instruction words: [%u]", i->length);
825 for (bits = 0; bits <= (int)nwu; bits++)
826 DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
827
828 if (incomplete) {
829 DEBUG_PRINTFQ(" (preliminary)");
830 } else {
831 i->ready = 1;
832 }
833
834 DEBUG_PRINTFQ("\n");
835
836 return 0;
837 }
838
839 /* parse_stream_
840 * read lines from stream f
841 * break each line into parts, populate parts into structures
842 */
843 static
844 int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
845 struct instruction_ *instr, **instr_list_entry;
846 unsigned int line = 0;
847 int retval = 0;
848 char buf[0x4000];
849
850 buf[sizeof buf - 1] = '\0';
851
852 while (fgets(buf, sizeof buf, f)) {
853 line++;
854
855 if (buf[sizeof buf - 1] != '\0') {
856 fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
857 retval = -1;
858 break;
859 }
860
861 if (buf_tokenize_(buf, &instr)) {
862 fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
863 retval = -1;
864 break;
865 }
866
867 if (instr) {
868 instr->src_line = line;
869 /* add to list of instructions */
870 instr_list_entry = dynarray_add(instructionps, &instr);
871 if (instr_list_entry == NULL) {
872 fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
873 break;
874 }
875
876 if (instr->label) {
877 struct label_ new_label = {
878 .label = instr->label,
879 .instr = instr_list_entry,
880 .ready = 0,
881 .addr = 0,
882 };
883 if (label_find_(labels, instr->label)) {
884 fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
885 break;
886 }
887
888 if (dynarray_add(labels, &new_label) == NULL) {
889 fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
890 break;
891 }
892 label_addr_calculate_(instructionps, labels);
893 }
894
895 if (instr_assemble_(labels, instr, allow_short_labels)) {
896 fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n");
897 break;
898 }
899 }
900 }
901 if (ferror(f)) {
902 fprintf(stderr, "%s():%s\n", "fgets", strerror(errno));
903 return -1;
904 }
905 if (! feof(f)) {
906 fprintf(stderr, "parsing aborted\n");
907 return -1;
908 }
909
910 return retval;
911 }
912
913 /* assemble_check_
914 * make a full pass over instruction list to resolve labels
915 */
916 static
917 int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
918 int retval = 0;
919 size_t x;
920
921 /* fixing short labels .... */
922 /* by here we have our list of instructions and their maximum instruction lengths */
923 /* and we have a list of addresses, based on those maximum lengths */
924 /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
925 /* and reassemble.. */
926 /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
927 /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
928
929 /* try this? keep another list of locations a label address is used */
930 /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
931
932 DEBUG_PRINTF(" final pass of assembler...\n");
933 for (x = 0; x < instructionps->entries; x++) {
934 struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
935 retval = instr_assemble_(labels, *instrp, allow_short_labels);
936 if (retval) {
937 fprintf(stderr, "instruction %zu failed to assemble\n", x);
938 return retval;
939 }
940 if (! (*instrp)->ready) {
941 fprintf(stderr, "instruction not resolvable\n");
942 return -1;
943 }
944 }
945
946 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
947 for (x = 0; x < labels->entries; x++) {
948 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
949 if (! l->ready)
950 retval |= -1;
951 if (opt_.verbose) {
952 printf("%3s0x%04x %-32s ",
953 l->ready ? "" : "*",
954 l->addr,
955 l->label);
956 instruction_print_(*(l->instr), 0);
957 printf("\n");
958 }
959 }
960
961 VERBOSE_PRINTF("\n");
962
963 if (retval)
964 fprintf(stderr, "some labels could not be resolved\n");
965
966 return retval;
967 }
968
969 /* output_
970 * write assembled words to named file
971 */
972 static
973 int output_(struct dynamic_array *instructionps, const char *filename) {
974 FILE *of = NULL;
975 struct instruction_ **instrp;
976 size_t i, r, total_words = 0;
977 size_t x;
978
979 if (! opt_.dryrun) {
980 of = fopen(filename, "w");
981 if (of == NULL) {
982 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
983 return -1;
984 }
985 }
986
987 for (i = 0; i < instructionps->entries; i++) {
988 instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
989
990 if (opt_.verbose) {
991 int s;
992 s = instruction_print_(*instrp, 1);
993 printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
994 for (x = 0; x < (*instrp)->length; x++) {
995 printf(" %04x", (*instrp)->instr_words[x]);
996 }
997 printf("\n");
998 }
999
1000 if (of) {
1001 r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
1002 if (r < (*instrp)->length) {
1003 fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
1004 return -1;
1005 }
1006 }
1007 total_words += (*instrp)->length;
1008 }
1009
1010 fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
1011 opt_.dryrun ? "assembled" : "wrote",
1012 i,
1013 total_words);
1014
1015 return 0;
1016 }
1017
1018 static struct dynamic_array *instructionps_;
1019 static struct dynamic_array *labels_;
1020
1021 int main(int argc, char *argv[]) {
1022 const char *out_filename = NULL;
1023 unsigned int allow_short_labels = 0;
1024 int c;
1025
1026 while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
1027 switch (c) {
1028 case 'd':
1029 opt_.dryrun++;
1030 break;
1031
1032 case 's':
1033 allow_short_labels++;
1034 break;
1035
1036 case 'o':
1037 if (out_filename) {
1038 fprintf(stderr, "Sorry, I can only write one file at a time.\n");
1039 exit(EX_CANTCREAT);
1040 }
1041 out_filename = optarg;
1042 break;
1043
1044 case 'v':
1045 opt_.verbose++;
1046 break;
1047
1048 case 'h':
1049 usage_(argv[0], 1);
1050 exit(EX_OK);
1051
1052 default:
1053 usage_(argv[0], 0);
1054 exit(EX_USAGE);
1055 }
1056 }
1057
1058 argc -= optind;
1059 argv += optind;
1060
1061 if (out_filename == NULL)
1062 out_filename = out_filename_default_;
1063
1064 /* init tables */
1065 instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
1066 labels_ = dynarray_new(sizeof(struct label_), 256);
1067 if (instructionps_ == NULL
1068 || labels_ == NULL) {
1069 fprintf(stderr, "failed to initialize\n");
1070 exit(EX_OSERR);
1071 }
1072
1073 /* if filenames were specified, parse them instead of stdin */
1074 if (argc) {
1075 while (argc) {
1076 char *filename = *argv;
1077 FILE *f = fopen(filename, "r");
1078
1079 argc--, argv++;
1080
1081 if (f == NULL) {
1082 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1083 continue;
1084 }
1085
1086 VERBOSE_PRINTF("assembling '%s'...\n", filename);
1087 c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
1088 if (c)
1089 break;
1090 fclose(f);
1091 }
1092 } else {
1093 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
1094 c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
1095 }
1096 if (c) {
1097 fprintf(stderr, "could not parse input, aborting\n");
1098 exit(EX_DATAERR);
1099 }
1100
1101 if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
1102 fprintf(stderr, "errors prevented assembly\n");
1103 exit(EX_DATAERR);
1104 }
1105
1106 if (output_(instructionps_, out_filename)) {
1107 fprintf(stderr, "failed to create output\n");
1108 exit(EX_OSERR);
1109 }
1110
1111 exit(EX_OK);
1112 }