removed checks for setting literal operands
[dcpu16] / as-dcpu16.c
1 #include <stdlib.h>
2 #include <unistd.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <strings.h>
6 #include <errno.h>
7 #include <sysexits.h>
8 #include <assert.h>
9
10 #include "dcpu16.h"
11 #include "common.h"
12
13 /*
14 * quick and dirty assembler for dcpu16
15 *
16 * Justin Wind <justin.wind@gmail.com>
17 * 2012 04 07 - implementation started
18 * 2012 04 10 - functional
19 * 2012 04 16 - support dat statements
20 *
21 * TODO
22 * needs ability to specify location for code or data
23 * needs ability to specify label as relative to another label
24 * short labels not correctly computed
25 */
26
27 static const char * const src_id_ = "$Id$";
28
29 const char const out_filename_default_[] = "a.out";
30
31 /* global invocation options */
32 struct options {
33 unsigned int verbose;
34 unsigned int dryrun;
35 } opt_ = {
36 .verbose = 0,
37 .dryrun = 0,
38 };
39
40 #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
41 #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
42 #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
43
44 static
45 void usage_(char *prog, unsigned int full) {
46 FILE *f = full ? stdout : stderr;
47 char *x = strrchr(prog, '/');
48
49 if (x && *(x + 1))
50 prog = x + 1;
51
52 if (full)
53 fprintf(f, "%s -- \n\n",
54 prog);
55
56 fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
57 prog);
58
59 if (full) {
60 fprintf(f, "\nOptions:\n"
61 "\t-h -- this screen\n"
62 "\t-o <file> -- output to <file> [default: %s]\n"
63 "\t-s -- allow short labels in instruction words\n"
64 "\t-d -- dry run, print results, do not write to file\n"
65 "\t-v -- verbose output\n",
66 out_filename_default_);
67
68 fprintf(f, "\n%78s\n",
69 src_id_);
70 }
71 }
72
73
74 /* instructions have operands */
75 struct operand_ {
76 struct operand_ *next;
77 char *operand; /* tokenized operand text */
78 };
79
80 /* keep an array of instructions as we read them in */
81 struct instruction_ {
82 size_t src_line;
83 char *label; /* set if a label points here */
84 char *opcode; /* tokenized instruction text */
85 struct operand_ *operands; /* list of operands */
86 unsigned int ready : 1; /* bytecode computed? */
87 unsigned int length; /* number of words of bytecode */
88 DCPU16_WORD instr_words[];
89 };
90
91 /* keep an array of labels, indexed back to their instruction locations */
92 struct label_ {
93 char *label; /* name of label */
94 struct instruction_ **instr; /* pointer into array of instructions */
95 unsigned int ready : 1; /* do we know where this label is yet? */
96 DCPU16_WORD addr;
97 };
98
99
100 /* locate and return the label entry matching name */
101 static
102 struct label_ *label_find_(struct dynamic_array *labels, char *name) {
103 size_t x;
104
105 for (x = 0; x < labels->entries; x++) {
106 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
107 if (strcmp(l->label, name) == 0)
108 return l;
109 }
110 return NULL;
111 }
112
113
114 /* if a label has a validly-calculated address, fetch it */
115 static
116 int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
117 struct label_ *l;
118
119 if ( (l = label_find_(labels, name)) == NULL )
120 return -1;
121 if (! l->ready)
122 return -2;
123 *addr = l->addr;
124 return 0;
125 }
126
127
128 /* attempt to determine the addresses of all labels */
129 static
130 void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
131 size_t i;
132
133 /* idea: label1:label2 - calculated as offset between labels */
134
135 /* for each label.. */
136 for (i = 0; i < labels->entries; i++) {
137 struct label_ *l;
138 struct instruction_ **instr;
139 unsigned int word_count = 0;
140
141 l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
142
143 /* if it's already calculated, great. */
144 if (l->ready)
145 continue;
146
147 /*
148 * starting at the instruction for this label,
149 * walk backwards through the list of instructions
150 * until we get to the start or a known prior label address.
151 * update our label with the freshly calculated addr
152 */
153 for (instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
154 instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0);
155 instr--) {
156
157 if ((*instr)->ready)
158 DEBUG_PRINTF("%s: instr not ready\n", __func__);
159 word_count += (*instr)->length;
160
161 /* have we come across an instruction which a label points to?
162 it should already be calculated, so just add that on and be done */
163 if ((*instr)->label
164 && strcmp((*instr)->label, l->label)) {
165 DCPU16_WORD addr;
166
167 if (label_addr_(labels, (*instr)->label, &addr)) {
168 fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
169 (*instr)->label,
170 l->label);
171 continue;
172 }
173
174 word_count += addr;
175 break;
176 }
177 }
178 l->addr = word_count;
179 l->ready = 1;
180 DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
181 }
182 }
183
184
185 /* generate the nibble for a given basic opcode */
186 static
187 int opcode_bits_(char *opcode) {
188 static struct {
189 char op[4];
190 char value;
191 } opcodes_lower_nibble[] = {
192 { "JSR", 0x00 },
193 /* { "future nbi instruction", 0x00 }, */
194 { "SET", 0x01 },
195 { "ADD", 0x02 },
196 { "SUB", 0x03 },
197 { "MUL", 0x04 },
198 { "DIV", 0x05 },
199 { "MOD", 0x06 },
200 { "SHL", 0x07 },
201 { "SHR", 0x08 },
202 { "AND", 0x09 },
203 { "BOR", 0x0a },
204 { "XOR", 0x0b },
205 { "IFE", 0x0c },
206 { "IFN", 0x0d },
207 { "IFG", 0x0e },
208 { "IFB", 0x0f },
209 { "", 0x00 }
210 }, *o;
211
212 for (o = opcodes_lower_nibble; o->op[0]; o++) {
213 if (strcasecmp(o->op, opcode) == 0)
214 break;
215 }
216
217 if (o->op[0] == '\0') {
218 fprintf(stderr, "unknown instruction '%s'\n", opcode);
219 return -1;
220 }
221
222 return o->value;
223 }
224
225 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
226 static
227 int nbi_opcode_bits_(char *nbi_opcode) {
228 static struct {
229 char op[4];
230 char value;
231 } nbi_opcodes_bits[] = {
232 { " ", 0x00 }, /* reserved for future */
233 { "JSR", 0x01 },
234 { "", 0x00 }
235 }, *o;
236
237 for (o = nbi_opcodes_bits; o->op[0]; o++) {
238 if (strcasecmp(o->op, nbi_opcode) == 0)
239 break;
240 }
241
242 if (o->op[0] == '\0') {
243 fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
244 return -1;
245 }
246
247 return o->value;
248 }
249
250 /* convert register character like 'x' to value like 0x03 */
251 static inline
252 unsigned int register_enumerate_(char r) {
253 const char regs[] = "AaBbCcXxYyZzIiJj";
254 const char *x = strchr(regs, r);
255
256 if (x)
257 return (x - regs)/2;
258
259 fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
260 return -1;
261 }
262
263 /* removes all occurences of chars from buf */
264 static inline
265 void buf_strip_chars_(char *buf, char *chars) {
266 char *s, *d;
267
268 for (s = d = buf; *s; s++, d++) {
269 while (*s && strchr(chars, *s)) {
270 s++;
271 }
272 if (!*s)
273 break;
274 *d = *s;
275 }
276 *d = *s;
277 }
278
279
280 /* value_bits_
281 * generate the six bits for a given operand string
282 * returns -1 if it could not parse the operand
283 * returns -2 if it could not parse the operand due to an unresolved label
284 * notes: nextword may be overwritten even if it's not used in final instruction
285 */
286 static
287 int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
288 static char *operand = NULL;
289 static size_t operand_sz = 0;
290
291 unsigned long l;
292 char *o, *ep;
293
294 /*
295 Our operand working buffer shouldn't ever need to be too big,
296 but DAT might blow that assumption.
297 */
298 if (operand_sz <= strlen(operand_orig)) {
299 void *tmp_ptr;
300 size_t new_sz = strlen(operand_orig);
301
302 if (new_sz < 256)
303 new_sz = 256;
304 new_sz += 256;
305
306 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
307 tmp_ptr = realloc(operand, new_sz);
308 if (tmp_ptr == NULL) {
309 fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
310 return -1;
311 }
312 operand = tmp_ptr;
313 operand_sz = new_sz;
314 }
315
316 o = strcpy(operand, operand_orig);
317
318 DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
319
320 /* this is a very stupid parser */
321
322 /* first, let's trim all whitespace out of string at once to make parsing easier */
323 buf_strip_chars_(operand, " \t\n");
324
325 /* single character might match a register */
326 if (strlen(operand) == 1
327 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
328 DEBUG_PRINTFQ("is register %c\n", *operand);
329 return register_enumerate_(*operand);
330 }
331
332 /* easy matches */
333 if (strcasecmp(operand, "POP") == 0) {
334 DEBUG_PRINTFQ("is POP\n");
335 return 0x18;
336 }
337 if (strcasecmp(operand, "PUSH") == 0) {
338 DEBUG_PRINTFQ("is PUSH\n");
339 return 0x19;
340 }
341 if (strcasecmp(operand, "PEEK") == 0) {
342 DEBUG_PRINTFQ("is PEEK\n");
343 return 0x1a;
344 }
345 if (strcasecmp(operand, "SP") == 0) {
346 DEBUG_PRINTFQ("is register SP\n");
347 return 0x1b;
348 }
349 if (strcasecmp(operand, "PC") == 0) {
350 DEBUG_PRINTFQ("is register PC\n");
351 return 0x1c;
352 }
353 if (strcasecmp(operand, "O") == 0) {
354 DEBUG_PRINTFQ("is register O\n");
355 return 0x1d;
356 }
357
358 /* is the operand [bracketed]? */
359 if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
360 /* eat the brackets */
361 operand[strlen(operand) - 1] = '\0';
362 operand++;
363
364 /* is it [register]? */
365 if (strlen(operand) == 1
366 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
367 DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
368 return 0x08 | register_enumerate_(*operand);
369 }
370
371 /* is it [register+something]? */
372 if ( (ep = strchr(operand, '+')) ) {
373 char *reg;
374 char *constant;
375
376 /* eat the plus */
377 *ep = '\0';
378 ep++;
379
380 /* figure out which one is which */
381 if (strlen(ep) == 1
382 && strchr("AaBbCcXxYyZzIiJj", *ep)) {
383 reg = ep;
384 constant = operand;
385 } else if (strlen(operand) == 1
386 && strchr("AaBbCcXxYyZzIiJj", *operand) ) {
387 reg = operand;
388 constant = ep;
389 } else {
390 DEBUG_PRINTFQ("is unparsable\n");
391 fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
392 return -1;
393 }
394
395 /* check if something is understandable as a value */
396 errno = 0;
397 l = strtoul(constant, &ep, 0);
398 if (errno == 0
399 && (*constant && (*ep == '\0')) ) {
400 /* string conversion went without issue */
401 /* validate it will fit in a word */
402 if (l > 0xffff) {
403 DEBUG_PRINTFQ("is out of range\n");
404 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
405 return -1;
406 }
407
408 /* seems fine */
409 *nextword = l & 0xffff;
410 *nextwordused += 1;
411 DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
412 return 0x10 | register_enumerate_(*reg);
413 } else if (errno) {
414 DEBUG_PRINTFQ("is out of range\n");
415 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
416 return -1;
417 }
418
419 /* what? still here? assume it's a label, I guess */
420 /* try to populate nextword with label address */
421 if (label_addr_(labels, operand, nextword)) {
422 DEBUG_PRINTFQ("(deferred label resolution)\n");
423 *nextwordused += 1;
424 return -2;
425 }
426 DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
427 *nextwordused += 1;
428 return 0x10 | register_enumerate_(*reg);
429 }
430
431 /* it must just be a dereferenced literal then */
432
433 errno = 0;
434 l = strtoul(operand, &ep, 0);
435 if (errno == 0
436 && (*operand && (*ep == '\0')) ) {
437 /* string conversion went without issue */
438 /* validate it will fit in a word */
439 if (l > 0xffff) {
440 DEBUG_PRINTFQ("is out of range\n");
441 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
442 return -1;
443 }
444
445 DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
446 *nextword = l & 0xffff;
447 *nextwordused += 1;
448 return 0x1e;
449 } else if (errno) {
450 DEBUG_PRINTFQ("is out of range\n");
451 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
452 }
453
454 /* not a number? try a label */
455 if (label_addr_(labels, operand, nextword)) {
456 DEBUG_PRINTFQ("(deferred label resolution)\n");
457 *nextwordused += 1;
458 return -2;
459 }
460 DEBUG_PRINTFQ("is a dereferenced label\n");
461 *nextwordused += 1;
462 return 0x1e;
463 }
464
465 /* left with a literal or a label, then */
466
467 errno = 0;
468 l = strtoul(operand, &ep, 0);
469 if (errno == 0
470 || (*operand && (*ep == '\0')) ) {
471 if (l > 0xffff) {
472 DEBUG_PRINTFQ("is out of range\n");
473 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
474 return -1;
475 }
476
477 DEBUG_PRINTFQ("is literal value (%lu)\n", l);
478 if (l < 0x20) {
479 return l + 0x20;
480 }
481
482 *nextword = l & 0xffff;
483 *nextwordused += 1;
484 return 0x1f;
485 }
486
487 /* try to populate nextword with label address */
488 if (label_addr_(labels, operand, nextword)) {
489 DEBUG_PRINTFQ("(deferred label resolution)\n");
490 /* assume non-small literal value */
491 *nextwordused += 1;
492 return -2;
493 }
494
495 DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
496 if (*nextword < 0x20 && allow_short_labels) {
497 DEBUG_PRINTF("small value label win\n");
498 return (0x20 + *nextword) & 0x3f;
499 }
500
501 *nextwordused += 1;
502 return 0x1f;
503 }
504
505 /* prints an instruction's assembly */
506 static inline
507 int instruction_print_(struct instruction_ *i, unsigned int with_label) {
508 struct operand_ *o;
509 int r;
510
511 if (with_label)
512 r = printf("%-16s %3s", i->label ? i->label : "", i->opcode);
513 else
514 r = printf("%3s", i->opcode);
515
516 for (o = i->operands; o; o = o->next)
517 r += printf(" %s%s", o->operand, o->next ? "," : "");
518
519 return r;
520 }
521
522 /* tokenize_line_
523 * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
524 * [label] instruction [operand[,operand[,...]]]
525 * Does no validation of contents of any of these tokens, as of yet.
526 */
527 static
528 int tokenize_line_(char *line, struct instruction_ **next_instr) {
529 const char const *whitespace = " \t\n";
530 const char const *quotes = "\"'`";
531 struct instruction_ *instr = NULL;
532 char *x, *st, *qt;
533 char *label, *opcode;
534 struct operand_ *operand_list = NULL;
535 struct operand_ **operand_tail = &operand_list;
536 size_t instr_words_needed = 0;
537
538 assert(line);
539 assert(next_instr);
540
541 *next_instr = NULL;
542
543 /* strip leading whitespace */
544 line += strspn(line, whitespace);
545 if (*line == '\0')
546 return 0;
547
548 /* set first bare ';' to '\0', thus isolating any comments */
549 /* here we only care about the side-effect of truncating the first separator character */
550 (void)strqtok_r(line, ";", '\\', quotes, &qt, &st);
551 /* we don't care if there was an unmatched quote at this point, let's see what happens */
552 if (*line == '\0')
553 return 0;
554
555 /* carve off the first token, determine if it is a label */
556 x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st);
557 if (x == NULL || *x == '\0')
558 return 0;
559 if (qt) {
560 /* labels could contain an unmatched quote character, I guess? */
561 qt = NULL;
562 }
563
564 /* we have something, try to make sense of what it is */
565
566 #ifdef NON_SPEC_LABELS
567 /* I want my labels like 'label:' */
568 if ( *(x + strlen(line) - 1) == ':' ) {
569 *(x + strlen(line) - 1) = '\0';
570 DEBUG_PRINTF("label: %s\n", x);
571
572 label = x;
573
574 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
575 } else {
576 label = NULL;
577 opcode = x;
578 }
579 #endif /* NON_SPEC_LABELS */
580
581 /* spec gives example of labels as ':label' */
582 if (*x == ':') {
583 *x = '\0';
584 x++;
585 label = x;
586 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
587 } else {
588 label = NULL;
589 opcode = x;
590 }
591 /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */
592
593 if (opcode && *opcode) {
594 /* if we have an opcode, we'll need at least one word to compile instruction */
595 instr_words_needed++;
596
597 while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) {
598 struct operand_ *new_operand;
599 char *y;
600
601 /* trim whitespaces */
602 x += strspn(x, whitespace);
603
604 if (*x) {
605 for (y = x + strlen(x) - 1; *y; y--) {
606 if (strchr(whitespace, *y)) {
607 *y = '\0';
608 }
609 }
610 }
611 /* nothing left? */
612 if (*x == '\0') {
613 fprintf(stderr, "null operand encountered\n");
614 return -1;
615 }
616
617 DEBUG_PRINTF("tokenized operand '%s'\n", x);
618
619 new_operand = malloc(sizeof *new_operand);
620 if (new_operand == NULL) {
621 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
622 return -1;
623 }
624
625 new_operand->operand = strdup(x);
626 if (new_operand->operand == NULL) {
627 fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
628 return -1;
629 }
630
631 new_operand->next = NULL;
632
633 if (strchr(quotes, x[0])) {
634 /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */
635 instr_words_needed += strlen(x) - 1;
636 }
637 instr_words_needed++;
638
639 *operand_tail = new_operand;
640 operand_tail = &(*operand_tail)->next;
641 }
642 }
643
644 DEBUG_PRINTF("allocating new instruction with room for %zu bytes\n", instr_words_needed);
645
646 instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
647 if (instr == NULL) {
648 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
649 return -1;
650 }
651
652 if (label) {
653 instr->label = strdup(label);
654 if (instr->label == NULL) {
655 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
656 return -1;
657 }
658 } else {
659 label = NULL;
660 }
661
662 if (opcode) {
663 instr->opcode = strdup(opcode);
664 if (instr->opcode == NULL) {
665 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
666 return -1;
667 }
668 } else {
669 opcode = NULL;
670 }
671
672 instr->operands = operand_list;
673
674 *next_instr = instr;
675
676 return 0;
677 }
678
679 /* try to generate bytecode for an instruction */
680 /* returns -1 on unrecoverable error */
681 static
682 int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
683 unsigned int nwu = 0; /* number of words used */
684 unsigned int incomplete = 0;
685 int bits;
686 struct operand_ *o = i->operands;
687
688 if (opt_.verbose > 2) {
689 printf("%s: assembling %p ", __func__, i);
690 instruction_print_(i, 1);
691 printf("(line :%zu)\n", i->src_line);
692 }
693
694 if (i->ready) {
695 /* already assembled, nothing to do */
696 return 0;
697 }
698
699 /* special case DAT */
700 if (strncasecmp(i->opcode, "DAT", 3) == 0) {
701 DEBUG_PRINTF("processing DAT...\n");
702
703 i->length = 0;
704
705 for ( /* */ ; o; o = o->next) {
706 size_t j, dat_len;
707 char *x;
708 unsigned long l;
709
710 DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, o->next);
711
712 /* is this a string? */
713 if ( (x = strchr("\"'`", o->operand[0])) ) {
714 dat_len = strlen(o->operand) - 1;
715 if (o->operand[dat_len] == *x) {
716 /* it is a string */
717 DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
718
719 for (j = 0, x = o->operand + 1;
720 j < dat_len - 1;
721 j++, x++) {
722 i->instr_words[i->length] = *x;
723 i->length++;
724 }
725 /* Note that strings in DAT do not include their zero-terminators */
726 /* specify as 'DAT "string", 0' */
727 }
728 continue;
729 }
730
731 /* is this a number? */
732 char *ep;
733 errno = 0;
734 l = strtoul(o->operand, &ep, 0);
735 if (errno == 0
736 && (*o->operand && (*ep == '\0')) ) {
737 /* conversion succeeded */
738 if (l > 0xffff) {
739 fprintf(stderr, "value '%lu' out of range\n", l);
740 return -1;
741 }
742 i->instr_words[i->length] = l;
743 i->length++;
744 continue;
745 }
746
747 /* otherwise assume it's a label, even if we don't know what it is */
748 if (label_addr_(labels, o->operand, &i->instr_words[i->length])) {
749 DEBUG_PRINTF("(deferred label resolution)\n");
750 incomplete = 1;
751 }
752 i->length++;
753 }
754
755 if (incomplete) {
756 DEBUG_PRINTF("pending label address\n");
757 } else {
758 i->ready = 1;
759 }
760
761 return 0;
762 } /* end of DAT */
763
764 /* start with opcode bits */
765 bits = opcode_bits_(i->opcode);
766 if (bits < 0) {
767 fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
768 for (o = i->operands; o; o = o->next)
769 fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
770 fprintf(stderr, "'\n");
771 return -1;
772 }
773 i->instr_words[0] |= 0x0f & bits;
774
775 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
776 if ((bits & 0x0f) == 0) {
777 bits = nbi_opcode_bits_(i->opcode);
778 if (bits < 0) {
779 fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
780 exit(EX_SOFTWARE);
781 }
782 } else {
783 if (o == NULL) {
784 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
785 return -1;
786 }
787 bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
788 if (bits == -1) {
789 fprintf(stderr, "couldn't assemble instruction\n");
790 return -1;
791 } else if (bits == -2) {
792 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
793 /* keep going, but don't finalize until we can calculate label address */
794 incomplete = 1;
795 bits = 0;
796 }
797 o = o->next;
798 }
799 i->instr_words[0] |= (bits & 0x3f) << 4;
800
801 if (o == NULL) {
802 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
803 return -1;
804 }
805
806 bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
807 if (bits == -1) {
808 fprintf(stderr, "couldn't assemble instruction\n");
809 return -1;
810 } else if (bits == -2) {
811 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
812 /* keep going, but don't finalize until we can calculate label address */
813 incomplete = 1;
814 bits = 0;
815 }
816 o = o->next;
817 i->instr_words[0] |= (bits & 0x3f) << 10;
818
819 if (o != NULL) {
820 fprintf(stderr, "too many operands\n");
821 return -1;
822 }
823
824 /* counting labels as words, we now know at least the maximum instruction length */
825
826 i->length = nwu + 1;
827
828 DEBUG_PRINTF("instruction words: [%u]", i->length);
829 for (bits = 0; bits <= (int)nwu; bits++)
830 DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
831
832 if (incomplete) {
833 DEBUG_PRINTFQ(" (preliminary)");
834 } else {
835 i->ready = 1;
836 }
837
838 DEBUG_PRINTFQ("\n");
839
840 return 0;
841 }
842
843 /* parse_stream_
844 * read lines from stream f
845 * break each line into parts, populate parts into structures
846 */
847 static
848 int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
849 struct instruction_ *instr, **instr_list_entry;
850 unsigned int line = 0;
851 int retval = 0;
852 char buf[0x4000];
853
854 buf[sizeof buf - 1] = '\0';
855
856 while (fgets(buf, sizeof buf, f)) {
857 line++;
858
859 if (buf[sizeof buf - 1] != '\0') {
860 fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
861 retval = -1;
862 break;
863 }
864
865 if (tokenize_line_(buf, &instr)) {
866 fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
867 retval = -1;
868 break;
869 }
870
871 if (instr) {
872 instr->src_line = line;
873 /* add to list of instructions */
874 instr_list_entry = dynarray_add(instructionps, &instr);
875 if (instr_list_entry == NULL) {
876 fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
877 break;
878 }
879
880 if (instr->label) {
881 struct label_ new_label = {
882 .label = instr->label,
883 .instr = instr_list_entry,
884 .ready = 0,
885 .addr = 0,
886 };
887 if (label_find_(labels, instr->label)) {
888 fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
889 break;
890 }
891
892 if (dynarray_add(labels, &new_label) == NULL) {
893 fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
894 break;
895 }
896 label_addr_calculate_(instructionps, labels);
897 }
898
899 if (instr_assemble_(labels, instr, allow_short_labels)) {
900 fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n");
901 break;
902 }
903 }
904 }
905 if (ferror(f)) {
906 fprintf(stderr, "%s():%s\n", "fgets", strerror(errno));
907 return -1;
908 }
909 if (! feof(f)) {
910 fprintf(stderr, "parsing aborted\n");
911 return -1;
912 }
913
914 return retval;
915 }
916
917 /* assemble_check_
918 * make a full pass over instruction list to resolve labels
919 */
920 static
921 int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
922 int retval = 0;
923 size_t x;
924
925 /* fixing short labels .... */
926 /* by here we have our list of instructions and their maximum instruction lengths */
927 /* and we have a list of addresses, based on those maximum lengths */
928 /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
929 /* and reassemble.. */
930 /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
931 /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
932
933 /* try this? keep another list of locations a label address is used */
934 /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
935
936 DEBUG_PRINTF(" final pass of assembler...\n");
937 for (x = 0; x < instructionps->entries; x++) {
938 struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
939 retval = instr_assemble_(labels, *instrp, allow_short_labels);
940 if (retval) {
941 fprintf(stderr, "instruction %zu failed to assemble\n", x);
942 return retval;
943 }
944 if (! (*instrp)->ready) {
945 fprintf(stderr, "instruction not resolvable\n");
946 return -1;
947 }
948 }
949
950 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
951 for (x = 0; x < labels->entries; x++) {
952 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
953 if (! l->ready)
954 retval |= -1;
955 if (opt_.verbose) {
956 printf("%3s0x%04x %-32s ",
957 l->ready ? "" : "*",
958 l->addr,
959 l->label);
960 instruction_print_(*(l->instr), 0);
961 printf("\n");
962 }
963 }
964
965 VERBOSE_PRINTF("\n");
966
967 if (retval)
968 fprintf(stderr, "some labels could not be resolved\n");
969
970 return retval;
971 }
972
973 /* output_
974 * write assembled words to named file
975 */
976 static
977 int output_(struct dynamic_array *instructionps, const char *filename) {
978 FILE *of = NULL;
979 struct instruction_ **instrp;
980 size_t i, r, total_words = 0;
981 size_t x;
982
983 if (! opt_.dryrun) {
984 of = fopen(filename, "w");
985 if (of == NULL) {
986 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
987 return -1;
988 }
989 }
990
991 for (i = 0; i < instructionps->entries; i++) {
992 instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
993
994 if (opt_.verbose) {
995 int s;
996 s = instruction_print_(*instrp, 1);
997 printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
998 for (x = 0; x < (*instrp)->length; x++) {
999 printf(" %04x", (*instrp)->instr_words[x]);
1000 }
1001 printf("\n");
1002 }
1003
1004 if (of) {
1005 r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
1006 if (r < (*instrp)->length) {
1007 fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
1008 return -1;
1009 }
1010 }
1011 total_words += (*instrp)->length;
1012 }
1013
1014 fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
1015 opt_.dryrun ? "assembled" : "wrote",
1016 i,
1017 total_words);
1018
1019 return 0;
1020 }
1021
1022 static struct dynamic_array *instructionps_;
1023 static struct dynamic_array *labels_;
1024
1025 int main(int argc, char *argv[]) {
1026 const char *out_filename = NULL;
1027 unsigned int allow_short_labels = 0;
1028 int c;
1029
1030 while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
1031 switch (c) {
1032 case 'd':
1033 opt_.dryrun++;
1034 break;
1035
1036 case 's':
1037 allow_short_labels++;
1038 break;
1039
1040 case 'o':
1041 if (out_filename) {
1042 fprintf(stderr, "Sorry, I can only write one file at a time.\n");
1043 exit(EX_CANTCREAT);
1044 }
1045 out_filename = optarg;
1046 break;
1047
1048 case 'v':
1049 opt_.verbose++;
1050 break;
1051
1052 case 'h':
1053 usage_(argv[0], 1);
1054 exit(EX_OK);
1055
1056 default:
1057 usage_(argv[0], 0);
1058 exit(EX_USAGE);
1059 }
1060 }
1061
1062 argc -= optind;
1063 argv += optind;
1064
1065 if (out_filename == NULL)
1066 out_filename = out_filename_default_;
1067
1068 /* init tables */
1069 instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
1070 labels_ = dynarray_new(sizeof(struct label_), 256);
1071 if (instructionps_ == NULL
1072 || labels_ == NULL) {
1073 fprintf(stderr, "failed to initialize\n");
1074 exit(EX_OSERR);
1075 }
1076
1077 /* if filenames were specified, parse them instead of stdin */
1078 if (argc) {
1079 while (argc) {
1080 char *filename = *argv;
1081 FILE *f = fopen(filename, "r");
1082
1083 argc--, argv++;
1084
1085 if (f == NULL) {
1086 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1087 continue;
1088 }
1089
1090 VERBOSE_PRINTF("assembling '%s'...\n", filename);
1091 c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
1092 fclose(f);
1093 if (c)
1094 break;
1095 }
1096 } else {
1097 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
1098 c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
1099 }
1100 if (c) {
1101 fprintf(stderr, "could not parse input, aborting\n");
1102 exit(EX_DATAERR);
1103 }
1104
1105 if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
1106 fprintf(stderr, "errors prevented assembly\n");
1107 exit(EX_DATAERR);
1108 }
1109
1110 if (output_(instructionps_, out_filename)) {
1111 fprintf(stderr, "failed to create output\n");
1112 exit(EX_OSERR);
1113 }
1114
1115 exit(EX_OK);
1116 }