began support for DAT assembler directive
[dcpu16] / as-dcpu16.c
1 #include <stdlib.h>
2 #include <unistd.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <errno.h>
6 #include <sysexits.h>
7 #include <assert.h>
8
9 #include "dcpu16.h"
10 #include "common.h"
11
12 /*
13 * quick and dirty assembler for dcpu16
14 *
15 * Justin Wind <justin.wind@gmail.com>
16 * 2012 04 07 - implementation started
17 * 2012 04 10 - functional
18 * 2012 04 16 - support dat statements
19 *
20 * TODO
21 * needs ability to specify location for code or data
22 * short labels not correctly computed
23 */
24
25 static const char * const src_id_ = "$Id$";
26
27 const char const out_filename_default_[] = "a.out";
28
29 /* global invocation options */
30 struct options {
31 unsigned int verbose;
32 unsigned int dryrun;
33 } opt_ = {
34 .verbose = 0,
35 .dryrun = 0,
36 };
37
38 #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
39 #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
40 #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
41
42 static
43 void usage_(char *prog, unsigned int full) {
44 FILE *f = full ? stdout : stderr;
45 char *x = strrchr(prog, '/');
46
47 if (x && *(x + 1))
48 prog = x + 1;
49
50 if (full)
51 fprintf(f, "%s -- \n\n",
52 prog);
53
54 fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
55 prog);
56
57 if (full) {
58 fprintf(f, "\nOptions:\n"
59 "\t-h -- this screen\n"
60 "\t-o <file> -- output to <file> [default: %s]\n"
61 "\t-s -- allow short labels in instruction words\n"
62 "\t-d -- dry run, print results, do not write to file\n"
63 "\t-v -- verbose output\n",
64 out_filename_default_);
65
66 fprintf(f, "\n%78s\n",
67 src_id_);
68 }
69 }
70
71
72 /* instructions have operands */
73 struct operand_ {
74 struct operand_ *next;
75 char *operand; /* tokenized operand text */
76 };
77
78 /* keep an array of instructions as we read them in */
79 struct instruction_ {
80 char *label; /* set if a label points here */
81 char *opcode; /* tokenized instruction text */
82 struct operand_ *operands; /* list of operands */
83 unsigned int ready : 1; /* bytecode computed? */
84 unsigned int length; /* number of words of bytecode */
85 DCPU16_WORD instr_words[];
86 };
87
88 /* keep an array of labels, indexed back to their instruction locations */
89 struct label_ {
90 char *label; /* name of label */
91 struct instruction_ **instr; /* pointer into array of instructions */
92 unsigned int ready : 1; /* do we know where this label is yet? */
93 DCPU16_WORD addr;
94 };
95
96
97 /* locate and return the label entry matching name */
98 static
99 struct label_ *label_find_(struct dynamic_array *labels, char *name) {
100 size_t x;
101
102 for (x = 0; x < labels->entries; x++) {
103 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
104 if (strcmp(l->label, name) == 0)
105 return l;
106 }
107 return NULL;
108 }
109
110
111 /* if a label has a validly-calculated address, fetch it */
112 static
113 int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
114 struct label_ *l;
115
116 if ( (l = label_find_(labels, name)) == NULL )
117 return -1;
118 if (! l->ready)
119 return -2;
120 *addr = l->addr;
121 return 0;
122 }
123
124
125 /* attempt to determine the addresses of all labels */
126 static
127 void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
128 size_t i;
129
130 /* for each label.. */
131 for (i = 0; i < labels->entries; i++) {
132 struct label_ *l;
133 struct instruction_ **instr;
134 unsigned int word_count = 0;
135
136 l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
137
138 /* if it's already calculated, great. */
139 if (l->ready)
140 continue;
141
142 /*
143 * starting at the instruction for this label,
144 * walk backwards through the list of instructions
145 * until we get to the start or a known prior label address.
146 * update our label with the freshly calculated addr
147 */
148 for (instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
149 instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0);
150 instr--) {
151
152 if ((*instr)->ready)
153 DEBUG_PRINTF("%s: instr not ready\n", __func__);
154 word_count += (*instr)->length;
155
156 /* have we come across an instruction which a label points to?
157 it should already be calculated, so just add that on and be done */
158 if ((*instr)->label
159 && strcmp((*instr)->label, l->label)) {
160 DCPU16_WORD addr;
161
162 if (label_addr_(labels, (*instr)->label, &addr)) {
163 fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
164 (*instr)->label,
165 l->label);
166 continue;
167 }
168
169 word_count += addr;
170 break;
171 }
172 }
173 l->addr = word_count;
174 l->ready = 1;
175 DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
176 }
177 }
178
179
180 /* generate the nibble for a given basic opcode */
181 static
182 int opcode_bits_(char *opcode) {
183 static struct {
184 char op[4];
185 char value;
186 } opcodes_lower_nibble[] = {
187 { "JSR", 0x00 },
188 /* { "future nbi instruction", 0x00 }, */
189 { "SET", 0x01 },
190 { "ADD", 0x02 },
191 { "SUB", 0x03 },
192 { "MUL", 0x04 },
193 { "DIV", 0x05 },
194 { "MOD", 0x06 },
195 { "SHL", 0x07 },
196 { "SHR", 0x08 },
197 { "AND", 0x09 },
198 { "BOR", 0x0a },
199 { "XOR", 0x0b },
200 { "IFE", 0x0c },
201 { "IFN", 0x0d },
202 { "IFG", 0x0e },
203 { "IFB", 0x0f },
204 { "", 0x00 }
205 }, *o;
206
207 for (o = opcodes_lower_nibble; o->op[0]; o++) {
208 if (strcasecmp(o->op, opcode) == 0)
209 break;
210 }
211
212 if (o->op[0] == '\0') {
213 fprintf(stderr, "unknown instruction '%s'\n", opcode);
214 return -1;
215 }
216
217 return o->value;
218 }
219
220 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
221 static
222 int nbi_opcode_bits_(char *nbi_opcode) {
223 static struct {
224 char op[4];
225 char value;
226 } nbi_opcodes_bits[] = {
227 { " ", 0x00 }, /* reserved for future */
228 { "JSR", 0x01 },
229 { "", 0x00 }
230 }, *o;
231
232 for (o = nbi_opcodes_bits; o->op[0]; o++) {
233 if (strcasecmp(o->op, nbi_opcode) == 0)
234 break;
235 }
236
237 if (o->op[0] == '\0') {
238 fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
239 return -1;
240 }
241
242 return o->value;
243 }
244
245 /* convert register character like 'x' to value like 0x03 */
246 static inline
247 unsigned int register_enumerate_(char r) {
248 const char regs[] = "AaBbCcXxYyZzIiJj";
249 const char *x = strchr(regs, r);
250
251 if (x)
252 return (x - regs)/2;
253
254 fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
255 return -1;
256 }
257
258 /* removes all occurences of chars from buf */
259 static inline
260 void buf_strip_chars_(char *buf, char *chars) {
261 char *s, *d;
262
263 for (s = d = buf; *s; s++, d++) {
264 while (*s && strchr(chars, *s)) {
265 s++;
266 }
267 if (!*s)
268 break;
269 *d = *s;
270 }
271 *d = *s;
272 }
273
274
275 /* value_bits_
276 * generate the six bits for a given operand string
277 * returns -1 if it could not parse the operand
278 * returns -2 if it could not parse the operand due to an unresolved label
279 * notes: nextword may be overwritten even if it's not used in final instruction
280 */
281 static
282 int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
283 static char *operand = NULL;
284 static size_t operand_sz = 0;
285
286 unsigned long l;
287 char *o, *ep;
288
289 /*
290 Our operand working buffer shouldn't ever need to be too big,
291 but DAT might blow that assumption.
292 */
293 if (operand_sz <= strlen(operand_orig)) {
294 void *tmp_ptr;
295 size_t new_sz = strlen(operand_orig);
296
297 if (new_sz < 256)
298 new_sz = 256;
299 new_sz += 256;
300
301 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
302 tmp_ptr = realloc(operand, new_sz);
303 if (tmp_ptr == NULL) {
304 fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
305 return -1;
306 }
307 operand = tmp_ptr;
308 operand_sz = new_sz;
309 }
310
311 o = strcpy(operand, operand_orig);
312
313 DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
314
315 /* this is a very stupid parser */
316
317 /* first, let's trim all whitespace out of string at once to make parsing easier */
318 buf_strip_chars_(operand, " \t\n");
319
320 /* single character might match a register */
321 if (strlen(operand) == 1
322 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
323 DEBUG_PRINTFQ("is register %c\n", *operand);
324 return register_enumerate_(*operand);
325 }
326
327 /* easy matches */
328 if (strcasecmp(operand, "POP") == 0) {
329 DEBUG_PRINTFQ("is POP\n");
330 return 0x18;
331 }
332 if (strcasecmp(operand, "PUSH") == 0) {
333 DEBUG_PRINTFQ("is PUSH\n");
334 return 0x19;
335 }
336 if (strcasecmp(operand, "PEEK") == 0) {
337 DEBUG_PRINTFQ("is PEEK\n");
338 return 0x1a;
339 }
340 if (strcasecmp(operand, "SP") == 0) {
341 DEBUG_PRINTFQ("is register SP\n");
342 return 0x1b;
343 }
344 if (strcasecmp(operand, "PC") == 0) {
345 DEBUG_PRINTFQ("is register PC\n");
346 return 0x1c;
347 }
348 if (strcasecmp(operand, "O") == 0) {
349 DEBUG_PRINTFQ("is register O\n");
350 return 0x1d;
351 }
352
353 /* is the operand [bracketed]? */
354 if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
355 /* eat the brackets */
356 operand[strlen(operand) - 1] = '\0';
357 operand++;
358
359 /* is it [register]? */
360 if (strlen(operand) == 1
361 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
362 DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
363 return 0x08 | register_enumerate_(*operand);
364 }
365
366 /* is it [register+something]? */
367 if ( (ep = strchr(operand, '+')) ) {
368 char *reg;
369 char *constant;
370
371 /* eat the plus */
372 *ep = '\0';
373 ep++;
374
375 /* figure out which one is which */
376 if (strlen(ep) == 1
377 && strchr("AaBbCcXxYyZzIiJj", *ep)) {
378 reg = ep;
379 constant = operand;
380 } else if (strlen(operand) == 1
381 && strchr("AaBbCcXxYyZzIiJj", *operand) ) {
382 reg = operand;
383 constant = ep;
384 } else {
385 DEBUG_PRINTFQ("is unparsable\n");
386 fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
387 return -1;
388 }
389
390 /* check if something is understandable as a value */
391 errno = 0;
392 l = strtoul(constant, &ep, 0);
393 if (errno == 0
394 && (*constant && (*ep == '\0')) ) {
395 /* string conversion went without issue */
396 /* validate it will fit in a word */
397 if (l > 0xffff) {
398 DEBUG_PRINTFQ("is out of range\n");
399 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
400 return -1;
401 }
402
403 /* seems fine */
404 *nextword = l & 0xffff;
405 *nextwordused += 1;
406 DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
407 return 0x10 | register_enumerate_(*reg);
408 } else if (errno) {
409 DEBUG_PRINTFQ("is out of range\n");
410 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
411 return -1;
412 }
413
414 /* what? still here? assume it's a label, I guess */
415 /* try to populate nextword with label address */
416 if (label_addr_(labels, operand, nextword)) {
417 DEBUG_PRINTFQ("(deferred label resolution)\n");
418 *nextwordused += 1;
419 return -2;
420 }
421 DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
422 *nextwordused += 1;
423 return 0x10 | register_enumerate_(*reg);
424 }
425
426 /* it must just be a dereferenced literal then */
427
428 errno = 0;
429 l = strtoul(operand, &ep, 0);
430 if (errno == 0
431 && (*operand && (*ep == '\0')) ) {
432 /* string conversion went without issue */
433 /* validate it will fit in a word */
434 if (l > 0xffff) {
435 DEBUG_PRINTFQ("is out of range\n");
436 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
437 return -1;
438 }
439
440 DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
441 *nextword = l & 0xffff;
442 *nextwordused += 1;
443 return 0x1e;
444 } else if (errno) {
445 DEBUG_PRINTFQ("is out of range\n");
446 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
447 }
448
449 /* not a number? try a label */
450 if (label_addr_(labels, operand, nextword)) {
451 DEBUG_PRINTFQ("(deferred label resolution)\n");
452 *nextwordused += 1;
453 return -2;
454 }
455 DEBUG_PRINTFQ("is a dereferenced label\n");
456 *nextwordused += 1;
457 return 0x1e;
458 }
459
460 /* left with a literal or a label, then */
461
462 errno = 0;
463 l = strtoul(operand, &ep, 0);
464 if (errno == 0
465 || (*operand && (*ep == '\0')) ) {
466 if (l > 0xffff) {
467 DEBUG_PRINTFQ("is out of range\n");
468 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
469 return -1;
470 }
471
472 DEBUG_PRINTFQ("is literal value (%lu)\n", l);
473 if (l < 0x20) {
474 return l + 0x20;
475 }
476
477 *nextword = l & 0xffff;
478 *nextwordused += 1;
479 return 0x1f;
480 }
481
482 /* try to populate nextword with label address */
483 if (label_addr_(labels, operand, nextword)) {
484 DEBUG_PRINTFQ("(deferred label resolution)\n");
485 /* assume non-small literal value */
486 *nextwordused += 1;
487 return -2;
488 }
489
490 DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
491 if (*nextword < 0x20 && allow_short_labels) {
492 DEBUG_PRINTF("small value label win\n");
493 return (0x20 + *nextword) & 0x3f;
494 }
495
496 *nextwordused += 1;
497 return 0x1f;
498 }
499
500 /* prints an instruction's assembly */
501 static inline
502 int instruction_print_(struct instruction_ *i, unsigned int with_label) {
503 struct operand_ *o;
504 int r;
505
506 if (with_label)
507 r = printf("%-16s %3s", i->label ? i->label : "", i->opcode);
508 else
509 r = printf("%3s", i->opcode);
510
511 for (o = i->operands; o; o = o->next)
512 r += printf(" %s%s", o->operand, o->next ? "," : "");
513
514 return r;
515 }
516
517 /* buf_tokenize_
518 * Parses the zero-terminated line of input 'buf' into a newly-allocated struct instruction_.
519 * [label] opcode [operand[,operand[,...]]]
520 * Does not yet validate if labels, opcodes, or operands are valid...
521 */
522 static
523 int buf_tokenize_(char *buf, struct instruction_ **next_instr) {
524 const char const *sep = " \t\n";
525 const char const *quot = "'`\"";
526 struct instruction_ *instr = NULL;
527 struct operand_ *operand_list = NULL;
528 char *label = NULL,
529 *opcode = NULL,
530 *operand = NULL;
531 char *x,
532 *y,
533 *st, *qt;
534 size_t instr_words_needed = 1;
535
536 assert(buf != NULL);
537 assert(next_instr != NULL);
538
539 *next_instr = NULL;
540
541 /* kill leading whitespace */
542 buf += strspn(buf, " \t\n");
543
544 /* kill trailing whitespace */
545 for (x = buf + strlen(buf); *x && strchr(sep, *x); x--)
546 *x = '\0';
547
548 /* split on first non-quoted ';', ignore following comment */
549 x = strqtok_r(buf, ";", '\\', quot, &qt, &st);
550 if (x == NULL)
551 return 0;
552 if (qt) {
553 fprintf(stderr, "unmatched %c-quote\n", *qt);
554 return -1;
555 }
556
557 /* determine if first token is label, opcode, or we just have a blank line to ignore */
558 x = strqtok_r(x, sep, '\\', quot, &qt, &st);
559 if (x == NULL)
560 return 0;
561 if (qt) {
562 fprintf(stderr, "unmatched %c-quote\n", *qt);
563 return -1;
564 }
565
566 /* I want c-style labels in my asm, but example in spec uses : in prefix rather than postfix */
567 #ifdef NON_SPEC_LABELS
568 /* labels end with :, otherwise its an opcode */
569 y = x + strlen(x) - 1;
570 if (*y == ':') {
571 *y = '\0';
572 label = x;
573 opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
574 } else {
575 label = NULL;
576 opcode = x;
577 }
578 #else /* NON_SPEC_LABELS */
579 /* labels.. begin? with ':' ? okay, I guess. Whatever. */
580 /* otherwise, it's an opcode */
581 if (*x == ':') {
582 label = x + 1;
583 opcode = strqtok_r(NULL, sep, '\\', quot, &qt, &st);
584 } else {
585 label = NULL;
586 opcode = x;
587 }
588 #endif /* NON_SPEC_LABELS */
589
590 if (opcode) {
591 operand = st;
592 }
593
594 /*
595 While normal instructions just have comma-separated operands,
596 DAT can be followed by comma-separated list of:
597 label, to be resolved to address
598 value, like 0xffff
599 string, "quoted", characters to be rendered into low-byte of words
600 */
601
602 if (operand) {
603 struct operand_ **o_next = &operand_list;
604
605 for (x = strqtok_r(operand, ",", '\\', quot, &qt, &st);
606 x;
607 x = strqtok_r(NULL, ",", '\\', quot, &qt, &st) ) {
608
609 /* trim leading whitespace */
610 x += strspn(x, " \t\n");
611 if (*x == '\0') {
612 fprintf(stderr, "encountered empty operand\n");
613 return -1;
614 }
615
616 /* trim trailing whitespace */
617 y = x + strlen(x) - 1;
618 while (strchr(" \t\n", *y)) {
619 *y = '\0';
620 y--;
621 }
622
623 /* new operand to append to list */
624 *o_next = malloc(sizeof **o_next);
625 if (*o_next == NULL) {
626 fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
627 return -1;
628 }
629
630 /* assume an operand uses one word, unless it's a string */
631 instr_words_needed += (*x == '"') ? strlen(x) : 1;
632
633 (*o_next)->operand = strdup(x);
634 if ((*o_next)->operand == NULL) {
635 fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
636 return -1;
637 }
638 (*o_next)->next = NULL;
639 o_next = &((*o_next)->next);
640 }
641 }
642
643 DEBUG_PRINTF("allocating instr with room for %zu words\n", instr_words_needed);
644
645 /* extra room for assembled words */
646 instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
647 if (instr == NULL) {
648 fprintf(stderr, "%s():%s\n", "calloc", strerror(errno));
649 return -1;
650 }
651
652 instr->label = label ? strdup(label) : NULL;
653 instr->opcode = opcode ? strdup(opcode) : NULL;
654 instr->operands = operand_list;
655
656 *next_instr = instr;
657
658 return 0;
659 }
660
661 /* try to generate bytecode for an instruction */
662 static
663 int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
664 unsigned int nwu = 0; /* number of words used */
665 unsigned int incomplete = 0;
666 int bits;
667 struct operand_ *o = i->operands;
668
669 if (opt_.verbose > 2) {
670 printf("%s: assembling %p ", __func__, i);
671 instruction_print_(i, 1);
672 printf("\n");
673 }
674
675 if (i->ready) {
676 /* already assembled, nothing to do */
677 return 0;
678 }
679
680 /* special case DAT */
681 if (strncasecmp(i->opcode, "DAT", 3) == 0) {
682 DEBUG_PRINTF("processing DAT...\n");
683
684 i->length = 0;
685
686 while (o) {
687 size_t j, dat_len;
688 char *x;
689 unsigned long l;
690
691 DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, o->next);
692
693 /* is this a string? */
694 if ( (x = strchr("\"'`", o->operand[0])) ) {
695 dat_len = strlen(o->operand) - 1;
696 if (o->operand[dat_len] == *x) {
697 /* it is a string */
698 DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
699
700 for (j = 0, x = o->operand + 1;
701 j < dat_len - 1;
702 j++, x++) {
703 i->instr_words[i->length] = (DCPU16_WORD)*x;
704 i->length++;
705 }
706 }
707 o = o->next;
708 continue;
709 }
710
711 char *ep;
712 errno = 0;
713 l = strtoul(o->operand, &ep, 0);
714 if (errno == 0
715 && (*o->operand && (*ep == '\0')) ) {
716 /* conversion succeeded */
717 if (l > 0xffff) {
718 fprintf(stderr, "value '%lu' out of range\n", l);
719 return -1;
720 }
721 }
722
723 fprintf(stderr, "FIXME finish implementing DAT\n");
724 /* check if it's a parsable number */
725
726 /* otherwise assume it's a label */
727
728
729
730 o = o->next;
731 }
732
733 return 0;
734 }
735
736 /* start with opcode bits */
737 bits = opcode_bits_(i->opcode);
738 if (bits < 0) {
739 fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
740 for (o = i->operands; o; o = o->next)
741 fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
742 fprintf(stderr, "'\n");
743 return -1;
744 }
745 i->instr_words[0] |= 0x0f & bits;
746
747 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
748 if ((bits & 0x0f) == 0) {
749 bits = nbi_opcode_bits_(i->opcode);
750 if (bits < 0) {
751 fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
752 exit(EX_SOFTWARE);
753 }
754 } else {
755 if (o == NULL) {
756 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
757 return -1;
758 }
759 bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
760 if (bits == -1) {
761 fprintf(stderr, "couldn't assemble instruction\n");
762 return -1;
763 } else if (bits == -2) {
764 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
765 /* keep going, but don't finalize until we can calculate label address */
766 incomplete = 1;
767 bits = 0;
768 }
769 o = o->next;
770 }
771 i->instr_words[0] |= (bits & 0x3f) << 4;
772
773 if (o == NULL) {
774 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
775 return -1;
776 }
777
778 bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
779 if (bits == -1) {
780 fprintf(stderr, "couldn't assemble instruction\n");
781 return -1;
782 } else if (bits == -2) {
783 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
784 /* keep going, but don't finalize until we can calculate label address */
785 incomplete = 1;
786 bits = 0;
787 }
788 o = o->next;
789 i->instr_words[0] |= (bits & 0x3f) << 10;
790
791 if (o != NULL) {
792 fprintf(stderr, "too many operands\n");
793 return -1;
794 }
795
796 /* counting labels as words, we now know at least the maximum instruction length */
797
798 i->length = nwu + 1;
799
800 DEBUG_PRINTF("instruction words: [%u]", i->length);
801 for (bits = 0; bits <= (int)nwu; bits++)
802 DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
803
804 if (incomplete) {
805 DEBUG_PRINTFQ(" (preliminary)");
806 } else {
807 i->ready = 1;
808 }
809
810 DEBUG_PRINTFQ("\n");
811
812 return 0;
813 }
814
815 /* parse_stream_
816 * read lines from stream f
817 * break each line into parts, populate parts into structures
818 */
819 static
820 int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
821 struct instruction_ *instr, **instr_list_entry;
822 unsigned int line = 0;
823 int retval = 0;
824 char buf[0x4000];
825
826 buf[sizeof buf - 1] = '\0';
827
828 while (fgets(buf, sizeof buf, f)) {
829 line++;
830
831 if (buf[sizeof buf - 1] != '\0') {
832 fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
833 retval = -1;
834 break;
835 }
836
837 if (buf_tokenize_(buf, &instr)) {
838 fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
839 retval = -1;
840 break;
841 }
842
843 if (instr) {
844 /* add to list of instructions */
845 instr_list_entry = dynarray_add(instructionps, &instr);
846 if (instr_list_entry == NULL) {
847 fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
848 break;
849 }
850
851 if (instr->label) {
852 struct label_ new_label = {
853 .label = instr->label,
854 .instr = instr_list_entry,
855 .ready = 0,
856 .addr = 0,
857 };
858 if (label_find_(labels, instr->label)) {
859 fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
860 break;
861 }
862
863 if (dynarray_add(labels, &new_label) == NULL) {
864 fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
865 break;
866 }
867 label_addr_calculate_(instructionps, labels);
868 }
869
870 instr_assemble_(labels, instr, allow_short_labels);
871 }
872 }
873 if (ferror(f)) {
874 fprintf(stderr, "%s():%s\n", "fgets", strerror(errno));
875 return -1;
876 }
877 if (! feof(f)) {
878 fprintf(stderr, "parsing aborted\n");
879 return -1;
880 }
881
882 return retval;
883 }
884
885 /* assemble_check_
886 * make a full pass over instruction list to resolve labels
887 */
888 static
889 int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
890 int retval = 0;
891 size_t x;
892
893 /* fixing short labels .... */
894 /* by here we have our list of instructions and their maximum instruction lengths */
895 /* and we have a list of addresses, based on those maximum lengths */
896 /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
897 /* and reassemble.. */
898 /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
899 /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
900
901 /* try this? keep another list of locations a label address is used */
902 /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
903
904 DEBUG_PRINTF(" final pass of assembler...\n");
905 for (x = 0; x < instructionps->entries; x++) {
906 struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
907 retval |= instr_assemble_(labels, *instrp, allow_short_labels);
908 if (retval) {
909 fprintf(stderr, "instruction %zu failed to assemble\n", x);
910 }
911 }
912
913 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
914 for (x = 0; x < labels->entries; x++) {
915 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
916 if (! l->ready)
917 retval |= -1;
918 if (opt_.verbose) {
919 printf("%3s0x%04x %-32s ",
920 l->ready ? "" : "*",
921 l->addr,
922 l->label);
923 instruction_print_(*(l->instr), 0);
924 printf("\n");
925 }
926 }
927
928 VERBOSE_PRINTF("\n");
929
930 if (retval)
931 fprintf(stderr, "some labels could not be resolved\n");
932
933 return retval;
934 }
935
936 static
937 int output_(struct dynamic_array *instructionps, const char *filename) {
938 FILE *of = NULL;
939 struct instruction_ **instrp;
940 size_t i, r, total_words = 0;
941 size_t x;
942
943 if (! opt_.dryrun) {
944 of = fopen(filename, "w");
945 if (of == NULL) {
946 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
947 return -1;
948 }
949 }
950
951 for (i = 0; i < instructionps->entries; i++) {
952 instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
953
954 if (opt_.verbose) {
955 int s;
956 s = instruction_print_(*instrp, 1);
957 printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
958 for (x = 0; x < (*instrp)->length; x++) {
959 printf(" %04x", (*instrp)->instr_words[x]);
960 }
961 printf("\n");
962 }
963
964 if (of) {
965 r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
966 if (r < (*instrp)->length) {
967 fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
968 return -1;
969 }
970 }
971 total_words += (*instrp)->length;
972 }
973
974 fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
975 opt_.dryrun ? "assembled" : "wrote",
976 i,
977 total_words);
978
979 return 0;
980 }
981
982 static struct dynamic_array *instructionps_;
983 static struct dynamic_array *labels_;
984
985 int main(int argc, char *argv[]) {
986 const char *out_filename = NULL;
987 unsigned int allow_short_labels = 0;
988 int c;
989
990 while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
991 switch (c) {
992 case 'd':
993 opt_.dryrun++;
994 break;
995
996 case 's':
997 allow_short_labels++;
998 break;
999
1000 case 'o':
1001 if (out_filename) {
1002 fprintf(stderr, "Sorry, I can only write one file at a time.\n");
1003 exit(EX_CANTCREAT);
1004 }
1005 out_filename = optarg;
1006 break;
1007
1008 case 'v':
1009 opt_.verbose++;
1010 break;
1011
1012 case 'h':
1013 usage_(argv[0], 1);
1014 exit(EX_OK);
1015
1016 default:
1017 usage_(argv[0], 0);
1018 exit(EX_USAGE);
1019 }
1020 }
1021
1022 argc -= optind;
1023 argv += optind;
1024
1025 if (out_filename == NULL)
1026 out_filename = out_filename_default_;
1027
1028 /* init tables */
1029 instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
1030 labels_ = dynarray_new(sizeof(struct label_), 256);
1031 if (instructionps_ == NULL
1032 || labels_ == NULL) {
1033 fprintf(stderr, "failed to initialize\n");
1034 exit(EX_OSERR);
1035 }
1036
1037 /* if filenames were specified, parse them instead of stdin */
1038 if (argc) {
1039 while (argc) {
1040 char *filename = *argv;
1041 FILE *f = fopen(filename, "r");
1042
1043 argc--, argv++;
1044
1045 if (f == NULL) {
1046 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1047 continue;
1048 }
1049
1050 VERBOSE_PRINTF("assembling '%s'...\n", filename);
1051 parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
1052
1053 fclose(f);
1054 }
1055 } else {
1056 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
1057 parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
1058 }
1059
1060 if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
1061 fprintf(stderr, "errors prevented assembly\n");
1062 exit(EX_DATAERR);
1063 }
1064
1065 if (output_(instructionps_, out_filename)) {
1066 fprintf(stderr, "failed to create output\n");
1067 exit(EX_OSERR);
1068 }
1069
1070 exit(EX_OK);
1071 }