f22e848867ce58d74c831dff22c0c6ab5e7a9042
[dcpu16] / as-dcpu16.c
1 #include <stdlib.h>
2 #include <unistd.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <strings.h>
6 #include <errno.h>
7 #include <sysexits.h>
8 #include <assert.h>
9
10 #include "dcpu16.h"
11 #include "common.h"
12
13 /*
14 * quick and dirty assembler for dcpu16
15 *
16 * Justin Wind <justin.wind@gmail.com>
17 * 2012 04 07 - implementation started
18 * 2012 04 10 - functional
19 * 2012 04 16 - support dat statements
20 *
21 * TODO
22 * needs ability to specify location for code or data
23 * needs ability to specify label as relative to another label
24 * short labels not correctly computed
25 * in label struct, store index of instruction rather than ptr, ptrs for iteration in addr calculation are ugly
26 */
27
28 static const char * const src_id_ = "$Id$";
29
30 const char const out_filename_default_[] = "a.out";
31
32 /* global invocation options */
33 struct options {
34 unsigned int verbose;
35 unsigned int dryrun;
36 } opt_ = {
37 .verbose = 0,
38 .dryrun = 0,
39 };
40
41 #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
42 #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
43 #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
44
45 static
46 void usage_(char *prog, unsigned int full) {
47 FILE *f = full ? stdout : stderr;
48 char *x = strrchr(prog, '/');
49
50 if (x && *(x + 1))
51 prog = x + 1;
52
53 if (full)
54 fprintf(f, "%s -- \n\n",
55 prog);
56
57 fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
58 prog);
59
60 if (full) {
61 fprintf(f, "\nOptions:\n"
62 "\t-h -- this screen\n"
63 "\t-o <file> -- output to <file> [default: %s]\n"
64 "\t-s -- allow short labels in instruction words\n"
65 "\t-d -- dry run, print results, do not write to file\n"
66 "\t-v -- verbose output\n",
67 out_filename_default_);
68
69 fprintf(f, "\n%78s\n",
70 src_id_);
71 }
72 }
73
74
75 /* instructions have operands */
76 struct operand_ {
77 struct operand_ *next;
78 char *operand; /* tokenized operand text */
79 };
80
81 /* keep an array of instructions as we read them in */
82 struct instruction_ {
83 size_t src_line;
84 char *label; /* set if a label points here */
85 char *opcode; /* tokenized instruction text */
86 struct operand_ *operands; /* list of operands */
87 unsigned int ready : 1; /* bytecode computed? */
88 unsigned int length; /* number of words of bytecode */
89 DCPU16_WORD instr_words[];
90 };
91
92 /* keep an array of labels, indexed back to their instruction locations */
93 struct label_ {
94 char *label; /* name of label */
95 struct instruction_ **instr; /* pointer into array of instructions */
96 unsigned int ready : 1; /* do we know where this label is yet? */
97 DCPU16_WORD addr;
98 };
99
100
101 /* locate and return the label entry matching name */
102 static
103 struct label_ *label_find_(struct dynamic_array *labels, char *name) {
104 size_t x;
105
106 for (x = 0; x < labels->entries; x++) {
107 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
108 if (strcmp(l->label, name) == 0)
109 return l;
110 }
111 return NULL;
112 }
113
114
115 /* if a label has a validly-calculated address, fetch it */
116 static
117 int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
118 struct label_ *l;
119
120 if ( (l = label_find_(labels, name)) == NULL )
121 return -1;
122 if (! l->ready)
123 return -2;
124 *addr = l->addr;
125 return 0;
126 }
127
128
129 /* attempt to determine the addresses of all labels */
130 static
131 void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
132 size_t i;
133
134 /* idea: label1:label2 - calculated as offset between labels */
135
136 /* for each label.. */
137 for (i = 0; i < labels->entries; i++) {
138 struct label_ *l;
139 struct instruction_ **instr;
140 unsigned int word_count = 0;
141
142 l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
143
144 DEBUG_PRINTFQ("%s: calculating address of label '%s'\n", __func__, l->label);
145
146 #if 0
147 force full resolution while debugging
148 /* if it's already calculated, great. */
149 if (l->ready)
150 continue;
151 #endif
152
153 /*
154 * starting at the instruction for this label,
155 * walk backwards through the list of instructions
156 * until we get to the start or a known prior label address.
157 * update our label with the freshly calculated addr
158 */
159
160 /* first fetch the instruction associated with the label we want to know about.. */
161 /* the addr of this instruction will be whatever follows all the preceding instructions */
162 /* so back up one before counting instruction lengths... */
163 instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
164 /* is it the first one? */
165 if (instr == (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0))
166 break;
167
168 instr--;
169
170 while (instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) {
171 if ((*instr)->ready == 0)
172 DEBUG_PRINTF("%s: instr '%s' not ready\n", __func__, (*instr)->opcode);
173 word_count += (*instr)->length;
174
175 DEBUG_PRINTF("%s: instr '%s' takes '%u' bytes\n", __func__, (*instr)->opcode, (*instr)->length);
176
177 /* have we come across an instruction which a label points to?
178 it should already be calculated, so just add that on and be done */
179 if ((*instr)->label
180 && strcmp((*instr)->label, l->label)) {
181 DCPU16_WORD addr;
182
183 if (label_addr_(labels, (*instr)->label, &addr)) {
184 fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
185 (*instr)->label,
186 l->label);
187 continue;
188 }
189
190 word_count += addr;
191 break;
192 }
193 instr--;
194 }
195 l->addr = word_count;
196 l->ready = 1;
197 DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
198 }
199 }
200
201
202 /* generate the nibble for a given basic opcode */
203 static
204 int opcode_bits_(char *opcode) {
205 static struct {
206 char op[4];
207 char value;
208 } opcodes_lower_nibble[] = {
209 { "JSR", 0x00 },
210 /* { "future nbi instruction", 0x00 }, */
211 { "SET", 0x01 },
212 { "ADD", 0x02 },
213 { "SUB", 0x03 },
214 { "MUL", 0x04 },
215 { "DIV", 0x05 },
216 { "MOD", 0x06 },
217 { "SHL", 0x07 },
218 { "SHR", 0x08 },
219 { "AND", 0x09 },
220 { "BOR", 0x0a },
221 { "XOR", 0x0b },
222 { "IFE", 0x0c },
223 { "IFN", 0x0d },
224 { "IFG", 0x0e },
225 { "IFB", 0x0f },
226 { "", 0x00 }
227 }, *o;
228
229 for (o = opcodes_lower_nibble; o->op[0]; o++) {
230 if (strcasecmp(o->op, opcode) == 0)
231 break;
232 }
233
234 if (o->op[0] == '\0') {
235 fprintf(stderr, "unknown instruction '%s'\n", opcode);
236 return -1;
237 }
238
239 return o->value;
240 }
241
242 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
243 static
244 int nbi_opcode_bits_(char *nbi_opcode) {
245 static struct {
246 char op[4];
247 char value;
248 } nbi_opcodes_bits[] = {
249 { " ", 0x00 }, /* reserved for future */
250 { "JSR", 0x01 },
251 { "", 0x00 }
252 }, *o;
253
254 for (o = nbi_opcodes_bits; o->op[0]; o++) {
255 if (strcasecmp(o->op, nbi_opcode) == 0)
256 break;
257 }
258
259 if (o->op[0] == '\0') {
260 fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
261 return -1;
262 }
263
264 return o->value;
265 }
266
267 /* convert register character like 'x' to value like 0x03 */
268 static inline
269 unsigned int register_enumerate_(char r) {
270 const char regs[] = "AaBbCcXxYyZzIiJj";
271 const char *x = strchr(regs, r);
272
273 if (x)
274 return (x - regs)/2;
275
276 fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
277 return -1;
278 }
279
280 /* removes all occurences of chars from buf */
281 static inline
282 void buf_strip_chars_(char *buf, char *chars) {
283 char *s, *d;
284
285 for (s = d = buf; *s; s++, d++) {
286 while (*s && strchr(chars, *s)) {
287 s++;
288 }
289 if (!*s)
290 break;
291 *d = *s;
292 }
293 *d = *s;
294 }
295
296
297 /* value_bits_
298 * generate the six bits for a given operand string
299 * returns -1 if it could not parse the operand
300 * returns -2 if it could not parse the operand due to an unresolved label
301 * notes: nextword may be overwritten even if it's not used in final instruction
302 */
303 static
304 int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
305 static char *operand = NULL;
306 static size_t operand_sz = 0;
307
308 unsigned long l;
309 char *o, *ep;
310
311 /*
312 Our operand working buffer shouldn't ever need to be too big,
313 but DAT might blow that assumption.
314 */
315 if (operand_sz <= strlen(operand_orig)) {
316 void *tmp_ptr;
317 size_t new_sz = strlen(operand_orig);
318
319 if (new_sz < 256)
320 new_sz = 256;
321 new_sz += 256;
322
323 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
324 tmp_ptr = realloc(operand, new_sz);
325 if (tmp_ptr == NULL) {
326 fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
327 return -1;
328 }
329 operand = tmp_ptr;
330 operand_sz = new_sz;
331 }
332
333 o = strcpy(operand, operand_orig);
334
335 DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
336
337 /* this is a very stupid parser */
338
339 /* first, let's trim all whitespace out of string at once to make parsing easier */
340 buf_strip_chars_(operand, " \t\n");
341
342 /* single character might match a register */
343 if (strlen(operand) == 1
344 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
345 DEBUG_PRINTFQ("is register %c\n", *operand);
346 return register_enumerate_(*operand);
347 }
348
349 /* easy matches */
350 if (strcasecmp(operand, "POP") == 0) {
351 DEBUG_PRINTFQ("is POP\n");
352 return 0x18;
353 }
354 if (strcasecmp(operand, "PUSH") == 0) {
355 DEBUG_PRINTFQ("is PUSH\n");
356 return 0x19;
357 }
358 if (strcasecmp(operand, "PEEK") == 0) {
359 DEBUG_PRINTFQ("is PEEK\n");
360 return 0x1a;
361 }
362 if (strcasecmp(operand, "SP") == 0) {
363 DEBUG_PRINTFQ("is register SP\n");
364 return 0x1b;
365 }
366 if (strcasecmp(operand, "PC") == 0) {
367 DEBUG_PRINTFQ("is register PC\n");
368 return 0x1c;
369 }
370 if (strcasecmp(operand, "O") == 0) {
371 DEBUG_PRINTFQ("is register O\n");
372 return 0x1d;
373 }
374
375 /* is the operand [bracketed]? */
376 if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
377 /* eat the brackets */
378 operand[strlen(operand) - 1] = '\0';
379 operand++;
380
381 /* is it [register]? */
382 if (strlen(operand) == 1
383 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
384 DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
385 return 0x08 | register_enumerate_(*operand);
386 }
387
388 /* is it [register+something]? */
389 if ( (ep = strchr(operand, '+')) ) {
390 char *reg;
391 char *constant;
392
393 DEBUG_PRINTFQ("is multipart.. ");
394
395 /* eat the plus */
396 *ep = '\0';
397 ep++;
398
399 /* figure out which one is which */
400 if (strlen(ep) == 1
401 && strchr("AaBbCcXxYyZzIiJj", *ep)) {
402 reg = ep;
403 constant = operand;
404 } else if (strlen(operand) == 1
405 && strchr("AaBbCcXxYyZzIiJj", *operand) ) {
406 reg = operand;
407 constant = ep;
408 } else {
409 DEBUG_PRINTFQ("is unparsable\n");
410 fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
411 return -1;
412 }
413
414 /* check if something is understandable as a value */
415 errno = 0;
416 l = strtoul(constant, &ep, 0);
417 if (errno == 0
418 && (*constant && (*ep == '\0')) ) {
419 /* string conversion went without issue */
420 /* validate it will fit in a word */
421 if (l > 0xffff) {
422 DEBUG_PRINTFQ("is out of range\n");
423 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
424 return -1;
425 }
426
427 /* seems fine */
428 *nextword = l & 0xffff;
429 *nextwordused += 1;
430 DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
431 return 0x10 | register_enumerate_(*reg);
432 } else if (errno == ERANGE) {
433 #if 0
434 oh, right, labels fall through
435 DEBUG_PRINTFQ("is out of range\n");
436 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
437 return -1;
438 #endif
439 }
440
441 /* what? still here? assume it's a label, I guess */
442 /* try to populate nextword with label address */
443 if (label_addr_(labels, operand, nextword)) {
444 DEBUG_PRINTFQ("(deferred label resolution)\n");
445 *nextwordused += 1;
446 return -2;
447 }
448 DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
449 *nextwordused += 1;
450 return 0x10 | register_enumerate_(*reg);
451 }
452
453 /* it must just be a dereferenced literal then */
454
455 errno = 0;
456 l = strtoul(operand, &ep, 0);
457 if (errno == 0
458 && (*operand && (*ep == '\0')) ) {
459 /* string conversion went without issue */
460 /* validate it will fit in a word */
461 if (l > 0xffff) {
462 DEBUG_PRINTFQ("is out of range\n");
463 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
464 return -1;
465 }
466
467 DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
468 *nextword = l & 0xffff;
469 *nextwordused += 1;
470 return 0x1e;
471 } else if (errno) {
472 DEBUG_PRINTFQ("is out of range\n");
473 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
474 }
475
476 /* not a number? try a label */
477 if (label_addr_(labels, operand, nextword)) {
478 DEBUG_PRINTFQ("(deferred label resolution)\n");
479 *nextwordused += 1;
480 return -2;
481 }
482 DEBUG_PRINTFQ("is a dereferenced label\n");
483 *nextwordused += 1;
484 return 0x1e;
485 }
486
487 /* left with a literal or a label, then */
488
489 errno = 0;
490 l = strtoul(operand, &ep, 0);
491 if (errno == 0
492 || (*operand && (*ep == '\0')) ) {
493 if (l > 0xffff) {
494 DEBUG_PRINTFQ("is out of range\n");
495 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
496 return -1;
497 }
498
499 DEBUG_PRINTFQ("is literal value (%lu)\n", l);
500 if (l < 0x20) {
501 return l + 0x20;
502 }
503
504 *nextword = l & 0xffff;
505 *nextwordused += 1;
506 return 0x1f;
507 }
508
509 /* try to populate nextword with label address */
510 if (label_addr_(labels, operand, nextword)) {
511 DEBUG_PRINTFQ("(deferred label resolution)\n");
512 /* assume non-small literal value */
513 *nextwordused += 1;
514 return -2;
515 }
516
517 DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
518 if (*nextword < 0x20 && allow_short_labels) {
519 DEBUG_PRINTF("small value label win\n");
520 return (0x20 + *nextword) & 0x3f;
521 }
522
523 *nextwordused += 1;
524 return 0x1f;
525 }
526
527 /* prints an instruction's assembly */
528 static inline
529 int instruction_print_(struct instruction_ *i, unsigned int with_label) {
530 struct operand_ *o;
531 int r;
532
533 if (with_label)
534 r = printf("%-16s ", i->label ? i->label : "");
535
536 r = printf("%3s", i->opcode ? i->opcode : "");
537
538 for (o = i->operands; o; o = o->next)
539 r += printf(" %s%s", o->operand, o->next ? "," : "");
540
541 return r;
542 }
543
544 /* tokenize_line_
545 * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
546 * [label] instruction [operand[,operand[,...]]]
547 * Does no validation of contents of any of these tokens, as of yet.
548 * does not clean up after itself if a malloc fails
549 */
550 static
551 int tokenize_line_(char *line, struct instruction_ **next_instr) {
552 const char const *whitespace = " \t\n";
553 const char const *quotes = "\"'`";
554 struct instruction_ *instr = NULL;
555 char *x, *st, *qt;
556 char *label, *opcode;
557 struct operand_ *operand_list = NULL;
558 struct operand_ **operand_tail = &operand_list;
559 size_t instr_words_needed = 0;
560
561 assert(line);
562 assert(next_instr);
563
564 *next_instr = NULL;
565
566 /* strip leading whitespace */
567 line += strspn(line, whitespace);
568 if (*line == '\0')
569 return 0;
570
571 /* set first bare ';' to '\0', thus isolating any comments */
572 /* here we only care about the side-effect of truncating the first separator character */
573 (void)strqtok_r(line, ";", '\\', quotes, &qt, &st);
574 /* we don't care if there was an unmatched quote at this point, let's see what happens */
575 if (*line == '\0')
576 return 0;
577
578 /* carve off the first token, determine if it is a label */
579 x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st);
580 if (x == NULL || *x == '\0')
581 return 0;
582 if (qt) {
583 /* labels could contain an unmatched quote character, I guess? */
584 qt = NULL;
585 }
586
587 /* we have something, try to make sense of what it is */
588
589 #ifdef NON_SPEC_LABELS
590 /* I want my labels like 'label:' */
591 if ( *(x + strlen(line) - 1) == ':' ) {
592 *(x + strlen(line) - 1) = '\0';
593 DEBUG_PRINTF("label: %s\n", x);
594
595 label = x;
596
597 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
598 } else {
599 label = NULL;
600 opcode = x;
601 }
602 #endif /* NON_SPEC_LABELS */
603
604 /* spec gives example of labels as ':label' */
605 if (*x == ':') {
606 *x = '\0';
607 x++;
608 label = x;
609 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
610 } else {
611 label = NULL;
612 opcode = x;
613 }
614 /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */
615
616 if (opcode && *opcode) {
617 /* if we have an opcode, we'll need at least one word to compile instruction */
618 instr_words_needed++;
619
620 /* build a list of operands to hang off this instruction */
621 while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) {
622 struct operand_ *new_operand;
623 char *y;
624
625 /* trim whitespaces */
626 x += strspn(x, whitespace);
627
628 if (*x) {
629 for (y = x + strlen(x) - 1; *y; y--) {
630 if (strchr(whitespace, *y)) {
631 *y = '\0';
632 }
633 }
634 }
635 /* nothing left? */
636 if (*x == '\0') {
637 fprintf(stderr, "null operand encountered\n");
638 return -1;
639 }
640
641 DEBUG_PRINTF("tokenized operand '%s'\n", x);
642
643 new_operand = malloc(sizeof *new_operand);
644 if (new_operand == NULL) {
645 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
646 return -1;
647 }
648
649 new_operand->operand = strdup(x);
650 if (new_operand->operand == NULL) {
651 fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
652 return -1;
653 }
654
655 new_operand->next = NULL;
656
657 if (strchr(quotes, x[0])) {
658 /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */
659 instr_words_needed += strlen(x) - 1;
660 }
661 instr_words_needed++;
662
663 *operand_tail = new_operand;
664 operand_tail = &(*operand_tail)->next;
665 }
666 }
667
668 DEBUG_PRINTF("allocating new instruction with room for %zu bytes\n", instr_words_needed);
669
670 instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
671 if (instr == NULL) {
672 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
673 return -1;
674 }
675
676 if (label) {
677 instr->label = strdup(label);
678 if (instr->label == NULL) {
679 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
680 return -1;
681 }
682 } else {
683 label = NULL;
684 }
685
686 if (opcode) {
687 instr->opcode = strdup(opcode);
688 if (instr->opcode == NULL) {
689 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
690 return -1;
691 }
692 } else {
693 opcode = NULL;
694 }
695
696 instr->operands = operand_list;
697
698 *next_instr = instr;
699
700 return 0;
701 }
702
703 /* try to generate bytecode for an instruction */
704 /* returns -1 on unrecoverable error */
705 static
706 int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
707 unsigned int nwu = 0; /* number of words used */
708 unsigned int incomplete = 0;
709 int bits;
710 struct operand_ *o = i->operands;
711
712 if (opt_.verbose > 2) {
713 printf("%s: assembling %p ", __func__, i);
714 instruction_print_(i, 1);
715 printf("(line %zu)\n", i->src_line);
716 }
717
718 #if 0
719 while debugging, always reassemble
720 if (i->ready) {
721 /* already assembled, nothing to do */
722 return 0;
723 }
724 #endif
725
726 if (i->opcode == NULL) {
727 assert(i->label);
728 assert(i->operands == NULL);
729 /* just a label, move along */
730 i->length = 0;
731 i->ready = 1;
732 return 0;
733 }
734
735 /* special case DAT */
736 if (strncasecmp(i->opcode, "DAT", 3) == 0) {
737 DEBUG_PRINTF("processing DAT...\n");
738
739 i->length = 0;
740
741 for ( /* */ ; o; o = o->next) {
742 size_t j, dat_len;
743 char *x;
744 unsigned long l;
745
746 DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, o->next);
747
748 /* is this a string? */
749 /* does it start with a quote, and end with the same quote? */
750 if ( (x = strchr("\"'`", o->operand[0])) ) {
751 dat_len = strlen(o->operand) - 1;
752 if (o->operand[dat_len] == *x) {
753 /* it is a string */
754 DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
755
756 for (j = 0, x = o->operand + 1;
757 j < dat_len - 1;
758 j++, x++) {
759 i->instr_words[i->length] = *x;
760 i->length++;
761 }
762 /* Note that strings in DAT do not include their zero-terminators */
763 /* specify as 'DAT "string", 0' */
764 }
765 continue;
766 }
767
768 /* is this a number? */
769 char *ep;
770 errno = 0;
771 l = strtoul(o->operand, &ep, 0);
772 if (errno == 0
773 && (*o->operand && (*ep == '\0')) ) {
774 /* conversion succeeded */
775 if (l > 0xffff) {
776 fprintf(stderr, "value '%lu' out of range\n", l);
777 return -1;
778 }
779 i->instr_words[i->length] = l;
780 i->length++;
781 continue;
782 }
783
784 /* otherwise assume it's a label, even if we don't know what it is */
785 if (label_addr_(labels, o->operand, &i->instr_words[i->length])) {
786 DEBUG_PRINTF("(deferred label '%s' resolution)\n", o->operand);
787 incomplete = 1;
788 }
789 i->length++;
790 }
791
792 if (incomplete) {
793 DEBUG_PRINTF("pending label address\n");
794 } else {
795 i->ready = 1;
796 }
797
798 return 0;
799 } /* end of DAT */
800
801 /* start with opcode bits */
802 bits = opcode_bits_(i->opcode);
803 if (bits < 0) {
804 fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
805 for (o = i->operands; o; o = o->next)
806 fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
807 fprintf(stderr, "'\n");
808 return -1;
809 }
810 i->instr_words[0] |= 0x0f & bits;
811
812 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
813 if ((bits & 0x0f) == 0) {
814 bits = nbi_opcode_bits_(i->opcode);
815 if (bits < 0) {
816 fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
817 exit(EX_SOFTWARE);
818 }
819 } else {
820 if (o == NULL) {
821 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
822 return -1;
823 }
824 bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
825 if (bits == -1) {
826 fprintf(stderr, "couldn't assemble instruction\n");
827 return -1;
828 } else if (bits == -2) {
829 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
830 /* keep going, but don't finalize until we can calculate label address */
831 incomplete = 1;
832 bits = 0;
833 }
834 o = o->next;
835 }
836 i->instr_words[0] |= (bits & 0x3f) << 4;
837
838 if (o == NULL) {
839 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
840 return -1;
841 }
842
843 bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
844 if (bits == -1) {
845 fprintf(stderr, "couldn't assemble instruction\n");
846 return -1;
847 } else if (bits == -2) {
848 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
849 /* keep going, but don't finalize until we can calculate label address */
850 incomplete = 1;
851 bits = 0;
852 }
853 o = o->next;
854 i->instr_words[0] |= (bits & 0x3f) << 10;
855
856 if (o != NULL) {
857 fprintf(stderr, "too many operands\n");
858 return -1;
859 }
860
861 /* counting labels as words, we now know at least the maximum instruction length */
862
863 i->length = nwu + 1;
864
865 DEBUG_PRINTF("instruction words: [%u]", i->length);
866 for (bits = 0; bits <= (int)nwu; bits++)
867 DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
868
869 if (incomplete) {
870 DEBUG_PRINTFQ(" (preliminary)");
871 } else {
872 i->ready = 1;
873 }
874
875 DEBUG_PRINTFQ("\n");
876
877 return 0;
878 }
879
880 /* parse_stream_
881 * read lines from stream f
882 * break each line into parts, populate parts into structures
883 */
884 static
885 int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
886 struct instruction_ *instr, **instr_list_entry;
887 unsigned int line = 0;
888 int retval = 0;
889 char buf[0x4000];
890
891 buf[sizeof buf - 1] = '\0';
892
893 while (fgets(buf, sizeof buf, f)) {
894 line++;
895
896 if (buf[sizeof buf - 1] != '\0') {
897 fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
898 retval = -1;
899 break;
900 }
901
902 if (tokenize_line_(buf, &instr)) {
903 fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
904 retval = -1;
905 break;
906 }
907
908 if (instr) {
909 instr->src_line = line;
910 /* add to list of instructions */
911 instr_list_entry = dynarray_add(instructionps, &instr);
912 if (instr_list_entry == NULL) {
913 fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
914 break;
915 }
916
917 if (instr->label) {
918 struct label_ new_label = {
919 .label = instr->label,
920 .instr = instr_list_entry,
921 .ready = 0,
922 .addr = 0,
923 };
924 if (label_find_(labels, instr->label)) {
925 fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
926 break;
927 }
928
929 if (dynarray_add(labels, &new_label) == NULL) {
930 fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
931 break;
932 }
933 label_addr_calculate_(instructionps, labels);
934 }
935
936 if (instr_assemble_(labels, instr, allow_short_labels)) {
937 fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n");
938 break;
939 }
940 }
941 }
942 if (ferror(f)) {
943 fprintf(stderr, "%s():%s\n", "fgets", strerror(errno));
944 return -1;
945 }
946 if (! feof(f)) {
947 fprintf(stderr, "parsing aborted\n");
948 return -1;
949 }
950
951 return retval;
952 }
953
954 /* assemble_check_
955 * make a full pass over instruction list to resolve labels
956 */
957 static
958 int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
959 int retval = 0;
960 size_t x;
961
962 /* fixing short labels .... */
963 /* by here we have our list of instructions and their maximum instruction lengths */
964 /* and we have a list of addresses, based on those maximum lengths */
965 /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
966 /* and reassemble.. */
967 /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
968 /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
969
970 /* try this? keep another list of locations a label address is used */
971 /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
972
973 DEBUG_PRINTF(" final pass of assembler...\n");
974 for (x = 0; x < instructionps->entries; x++) {
975 struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
976 retval = instr_assemble_(labels, *instrp, allow_short_labels);
977 if (retval) {
978 fprintf(stderr, "instruction %zu failed to assemble\n", x);
979 return retval;
980 }
981 if (! (*instrp)->ready) {
982 fprintf(stderr, "instruction not resolvable\n");
983 return -1;
984 }
985 }
986
987 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
988 for (x = 0; x < labels->entries; x++) {
989 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
990 if (! l->ready)
991 retval |= -1;
992 if (opt_.verbose) {
993 printf("%3s0x%04x %-32s ",
994 l->ready ? "" : "*",
995 l->addr,
996 l->label);
997 instruction_print_(*(l->instr), 0);
998 printf("\n");
999 }
1000 }
1001
1002 VERBOSE_PRINTF("\n");
1003
1004 if (retval)
1005 fprintf(stderr, "some labels could not be resolved\n");
1006
1007 return retval;
1008 }
1009
1010 /* output_
1011 * write assembled words to named file
1012 */
1013 static
1014 int output_(struct dynamic_array *instructionps, const char *filename) {
1015 FILE *of = NULL;
1016 struct instruction_ **instrp;
1017 size_t i, r, total_words = 0;
1018 size_t x;
1019
1020 if (! opt_.dryrun) {
1021 of = fopen(filename, "w");
1022 if (of == NULL) {
1023 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1024 return -1;
1025 }
1026 }
1027
1028 for (i = 0; i < instructionps->entries; i++) {
1029 instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
1030
1031 if (opt_.verbose) {
1032 int s;
1033 s = instruction_print_(*instrp, 1);
1034 printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
1035 for (x = 0; x < (*instrp)->length; x++) {
1036 printf(" %04x", (*instrp)->instr_words[x]);
1037 }
1038 printf("\n");
1039 }
1040
1041 if (of) {
1042 r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
1043 if (r < (*instrp)->length) {
1044 fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
1045 return -1;
1046 }
1047 }
1048 total_words += (*instrp)->length;
1049 }
1050
1051 fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
1052 opt_.dryrun ? "assembled" : "wrote",
1053 i,
1054 total_words);
1055
1056 return 0;
1057 }
1058
1059 static struct dynamic_array *instructionps_;
1060 static struct dynamic_array *labels_;
1061
1062 int main(int argc, char *argv[]) {
1063 const char *out_filename = NULL;
1064 unsigned int allow_short_labels = 0;
1065 int c;
1066
1067 while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
1068 switch (c) {
1069 case 'd':
1070 opt_.dryrun++;
1071 break;
1072
1073 case 's':
1074 allow_short_labels++;
1075 break;
1076
1077 case 'o':
1078 if (out_filename) {
1079 fprintf(stderr, "Sorry, I can only write one file at a time.\n");
1080 exit(EX_CANTCREAT);
1081 }
1082 out_filename = optarg;
1083 break;
1084
1085 case 'v':
1086 opt_.verbose++;
1087 break;
1088
1089 case 'h':
1090 usage_(argv[0], 1);
1091 exit(EX_OK);
1092
1093 default:
1094 usage_(argv[0], 0);
1095 exit(EX_USAGE);
1096 }
1097 }
1098
1099 argc -= optind;
1100 argv += optind;
1101
1102 if (out_filename == NULL)
1103 out_filename = out_filename_default_;
1104
1105 /* init tables */
1106 instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
1107 labels_ = dynarray_new(sizeof(struct label_), 256);
1108 if (instructionps_ == NULL
1109 || labels_ == NULL) {
1110 fprintf(stderr, "failed to initialize\n");
1111 exit(EX_OSERR);
1112 }
1113
1114 /* if filenames were specified, parse them instead of stdin */
1115 if (argc) {
1116 while (argc) {
1117 char *filename = *argv;
1118 FILE *f = fopen(filename, "r");
1119
1120 argc--, argv++;
1121
1122 if (f == NULL) {
1123 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1124 continue;
1125 }
1126
1127 VERBOSE_PRINTF("assembling '%s'...\n", filename);
1128 c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
1129 fclose(f);
1130 if (c)
1131 break;
1132 }
1133 } else {
1134 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
1135 c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
1136 }
1137 if (c) {
1138 fprintf(stderr, "could not parse input, aborting\n");
1139 exit(EX_DATAERR);
1140 }
1141
1142 if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
1143 fprintf(stderr, "errors prevented assembly\n");
1144 exit(EX_DATAERR);
1145 }
1146
1147 if (output_(instructionps_, out_filename)) {
1148 fprintf(stderr, "failed to create output\n");
1149 exit(EX_OSERR);
1150 }
1151
1152 exit(EX_OK);
1153 }