further changes for v1.7: cpu fixes, support for 'hardware' devices, display to vnc
[dcpu16] / as-dcpu16.c
1 #include <stdlib.h>
2 #include <unistd.h>
3 #include <stdio.h>
4 #include <string.h>
5 #include <strings.h>
6 #include <errno.h>
7 #include <sysexits.h>
8 #include <assert.h>
9
10 #include "dcpu16.h"
11 #include "common.h"
12
13 /*
14 * quick and dirty assembler for dcpu16
15 *
16 * Justin Wind <justin.wind@gmail.com>
17 * 2012 04 07 - implementation started
18 * 2012 04 10 - functional
19 * 2012 04 16 - support dat statements
20 * 2012 05 05 - v1.7 revision started
21 * 2012 05 08 - v1.7 revision implemented
22 *
23 * TODO
24 * needs ability to specify location for code or data
25 * needs ability to specify label as relative to another label
26 * short labels not correctly computed
27 * in label struct, store index of instruction rather than ptr, ptrs for iteration in addr calculation are ugly
28 */
29
30 static const char * const src_id_ = "$Id$";
31
32 const char const out_filename_default_[] = "a.out";
33
34 /* global invocation options */
35 struct options {
36 unsigned int verbose;
37 unsigned int dryrun;
38 } opt_ = {
39 .verbose = 0,
40 .dryrun = 0,
41 };
42
43 #define DEBUG_PRINTF(...) do { if (opt_.verbose > 2) { printf("DEBUG: "); printf(__VA_ARGS__); } } while (0)
44 #define DEBUG_PRINTFQ(...) do { if (opt_.verbose > 2) printf(__VA_ARGS__); } while (0)
45 #define VERBOSE_PRINTF(...) do { if (opt_.verbose) printf(__VA_ARGS__); } while (0)
46
47 static
48 void usage_(char *prog, unsigned int full) {
49 FILE *f = full ? stdout : stderr;
50 char *x = strrchr(prog, '/');
51
52 if (x && *(x + 1))
53 prog = x + 1;
54
55 if (full)
56 fprintf(f, "%s -- \n\n",
57 prog);
58
59 fprintf(f, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
60 prog);
61
62 if (full) {
63 fprintf(f, "\nOptions:\n"
64 "\t-h -- this screen\n"
65 "\t-o <file> -- output to <file> [default: %s]\n"
66 "\t-s -- allow short labels in instruction words\n"
67 "\t-d -- dry run, print results, do not write to file\n"
68 "\t-v -- verbose output\n",
69 out_filename_default_);
70
71 fprintf(f, "\n%78s\n",
72 src_id_);
73 }
74 }
75
76 /* LSB-0 aaaaaabbbbbooooo */
77 #define OPCODE_BITS 5
78 #define OPERAND_B_BITS 5
79 #define OPERAND_A_BITS 6
80 #define N_BIT_MASK(__x__) ((1 << (__x__)) - 1)
81
82
83 /* instructions have operands */
84 struct operand_ {
85 struct operand_ *next;
86 char *operand; /* tokenized operand text */
87 };
88
89 /* keep an array of instructions as we read them in */
90 struct instruction_ {
91 size_t src_line;
92 char *label; /* set if a label points here */
93 char *opcode; /* tokenized instruction text */
94 struct operand_ *operands; /* list of operands */
95 unsigned int ready : 1; /* bytecode computed? */
96 unsigned int length; /* number of words of bytecode */
97 DCPU16_WORD instr_words[];
98 };
99
100 /* keep an array of labels, indexed back to their instruction locations */
101 struct label_ {
102 char *label; /* name of label */
103 struct instruction_ **instr; /* pointer into array of instructions */
104 unsigned int ready : 1; /* do we know where this label is yet? */
105 DCPU16_WORD addr;
106 };
107
108
109 /* locate and return the label entry matching name */
110 static
111 struct label_ *label_find_(struct dynamic_array *labels, char *name) {
112 size_t x;
113
114 for (x = 0; x < labels->entries; x++) {
115 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
116 if (strcmp(l->label, name) == 0)
117 return l;
118 }
119 return NULL;
120 }
121
122
123 /* if a label has a validly-calculated address, fetch it */
124 static
125 int label_addr_(struct dynamic_array *labels, char *name, DCPU16_WORD *addr) {
126 struct label_ *l;
127
128 if ( (l = label_find_(labels, name)) == NULL )
129 return -1;
130 if (! l->ready)
131 return -2;
132 *addr = l->addr;
133 return 0;
134 }
135
136
137 /* attempt to determine the addresses of all labels */
138 static
139 void label_addr_calculate_(struct dynamic_array *instructionps, struct dynamic_array *labels) {
140 size_t i;
141
142 /* idea: label1:label2 - calculated as offset between labels */
143
144 /* for each label.. */
145 for (i = 0; i < labels->entries; i++) {
146 struct label_ *l;
147 struct instruction_ **instr;
148 unsigned int word_count = 0;
149
150 l = (struct label_ *)DYNARRAY_ITEM(*labels, i);
151
152 DEBUG_PRINTFQ("%s: calculating address of label '%s'\n", __func__, l->label);
153
154 #if 0
155 force full resolution while debugging
156 /* if it's already calculated, great. */
157 if (l->ready)
158 continue;
159 #endif
160
161 /*
162 * starting at the instruction for this label,
163 * walk backwards through the list of instructions
164 * until we get to the start or a known prior label address.
165 * update our label with the freshly calculated addr
166 */
167
168 /* first fetch the instruction associated with the label we want to know about.. */
169 /* the addr of this instruction will be whatever follows all the preceding instructions */
170 /* so back up one before counting instruction lengths... */
171 instr = ((struct label_ *)DYNARRAY_ITEM(*labels, i))->instr;
172 /* is it the first one? */
173 if (instr == (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0))
174 break;
175
176 instr--;
177
178 while (instr >= (struct instruction_ **)DYNARRAY_ITEM(*instructionps, 0)) {
179 if ((*instr)->ready == 0)
180 DEBUG_PRINTF("%s: instr '%s' not ready\n", __func__, (*instr)->opcode);
181 word_count += (*instr)->length;
182
183 DEBUG_PRINTF("%s: instr '%s' takes '%u' bytes\n", __func__, (*instr)->opcode, (*instr)->length);
184
185 /* have we come across an instruction which a label points to?
186 it should already be calculated, so just add that on and be done */
187 if ((*instr)->label
188 && strcmp((*instr)->label, l->label)) {
189 DCPU16_WORD addr;
190
191 if (label_addr_(labels, (*instr)->label, &addr)) {
192 fprintf(stderr, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
193 (*instr)->label,
194 l->label);
195 continue;
196 }
197
198 word_count += addr;
199 break;
200 }
201 instr--;
202 }
203 l->addr = word_count;
204 l->ready = 1;
205 DEBUG_PRINTF("label '%s' now has addr of 0x%04x\n", l->label, word_count);
206 }
207 }
208
209
210 /* generate the nibble for a given basic opcode */
211 static
212 int opcode_bits_(char *opcode) {
213 static struct {
214 char op[4];
215 char value;
216 } opcodes_lower_nibble[] = {
217 { "JSR", 0x00 },
218 { "INT", 0x00 },
219 { "IAG", 0x00 },
220 { "IAS", 0x00 },
221 { "RFI", 0x00 },
222 { "IAQ", 0x00 },
223 { "HWN", 0x00 },
224 { "HWQ", 0x00 },
225 { "HWI", 0x00 },
226 { "SET", 0x01 },
227 { "ADD", 0x02 },
228 { "SUB", 0x03 },
229 { "MUL", 0x04 },
230 { "MLI", 0x05 },
231 { "DIV", 0x06 },
232 { "DVI", 0x07 },
233 { "MOD", 0x08 },
234 { "MDI", 0x09 },
235 { "AND", 0x0a },
236 { "BOR", 0x0b },
237 { "XOR", 0x0c },
238 { "SHR", 0x0d },
239 { "ASR", 0x0e },
240 { "SHL", 0x0f },
241 { "IFB", 0x10 },
242 { "IFC", 0x11 },
243 { "IFE", 0x12 },
244 { "IFN", 0x13 },
245 { "IFG", 0x14 },
246 { "IFA", 0x15 },
247 { "IFL", 0x16 },
248 { "IFU", 0x17 },
249 { "ADX", 0x1a },
250 { "SBX", 0x1b },
251 { "STI", 0x1e },
252 { "SDI", 0x1f },
253 { "", 0x00 }
254 }, *o;
255
256 for (o = opcodes_lower_nibble; o->op[0]; o++) {
257 if (strcasecmp(o->op, opcode) == 0)
258 break;
259 }
260
261 if (o->op[0] == '\0') {
262 fprintf(stderr, "unknown instruction '%s'\n", opcode);
263 return -1;
264 }
265
266 return o->value;
267 }
268
269 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
270 static
271 int nbi_opcode_bits_(char *nbi_opcode) {
272 static struct {
273 char op[4];
274 char value;
275 } nbi_opcodes_bits[] = {
276 { " ", 0x00 }, /* reserved for future */
277 { "JSR", 0x01 },
278 { "INT", 0x08 },
279 { "IAG", 0x09 },
280 { "IAS", 0x0a },
281 { "RFI", 0x0b },
282 { "IAQ", 0x0c },
283 { "HWN", 0x10 },
284 { "HWQ", 0x11 },
285 { "HWI", 0x12 },
286 { "", 0x00 }
287 }, *o;
288
289 for (o = nbi_opcodes_bits; o->op[0]; o++) {
290 if (strcasecmp(o->op, nbi_opcode) == 0)
291 break;
292 }
293
294 if (o->op[0] == '\0') {
295 fprintf(stderr, "unknown nbi instruction '%s'\n", o->op);
296 return -1;
297 }
298
299 return o->value;
300 }
301
302 /* convert register character like 'x' to value like 0x03 */
303 static inline
304 unsigned int register_enumerate_(char r) {
305 const char regs[] = "AaBbCcXxYyZzIiJj";
306 const char *x = strchr(regs, r);
307
308 if (x)
309 return (x - regs)/2;
310
311 fprintf(stderr, "internal error, unknown register character 0x%02x\n", r);
312 return -1;
313 }
314
315 /* removes all occurences of chars from buf */
316 static inline
317 void buf_strip_chars_(char *buf, char *chars) {
318 char *s, *d;
319
320 for (s = d = buf; *s; s++, d++) {
321 while (*s && strchr(chars, *s)) {
322 s++;
323 }
324 if (!*s)
325 break;
326 *d = *s;
327 }
328 *d = *s;
329 }
330
331
332 /* value_bits_
333 * generate the six bits for a given operand string
334 * returns -1 if it could not parse the operand
335 * returns -2 if it could not parse the operand due to an unresolved label
336 * notes: nextword may be overwritten even if it's not used in final instruction
337 *
338 */
339 static
340 int value_bits_(struct dynamic_array *labels, const char *operand_orig, DCPU16_WORD *nextword, unsigned int *nextwordused, unsigned int allow_short_labels) {
341 static char *operand = NULL;
342 static size_t operand_sz = 0;
343
344 unsigned long l;
345 char *o, *ep;
346
347 /*
348 Our operand working buffer shouldn't ever need to be too big,
349 but DAT might blow that assumption.
350 */
351 if (operand_sz <= strlen(operand_orig)) {
352 void *tmp_ptr;
353 size_t new_sz = strlen(operand_orig);
354
355 if (new_sz < 256)
356 new_sz = 256;
357 new_sz += 256;
358
359 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
360 tmp_ptr = realloc(operand, new_sz);
361 if (tmp_ptr == NULL) {
362 fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
363 return -1;
364 }
365 operand = tmp_ptr;
366 operand_sz = new_sz;
367 }
368
369 o = strcpy(operand, operand_orig);
370
371 DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
372
373 /* this is a very stupid parser */
374
375 /* first, let's trim all whitespace out of string at once to make parsing easier */
376 buf_strip_chars_(operand, " \t\n");
377
378 /* single character might match a register */
379 if (strlen(operand) == 1
380 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
381 DEBUG_PRINTFQ("is register %c\n", *operand);
382 return register_enumerate_(*operand);
383 }
384
385 /* easy matches */
386
387 /* push and pop now share the same operand value */
388 if (strcasecmp(operand, "POP") == 0
389 || strcasecmp(operand, "[SP++]") == 0) {
390 DEBUG_PRINTFQ("is POP\n");
391 return 0x18;
392 }
393 if (strcasecmp(operand, "PUSH") == 0
394 || strcasecmp(operand, "[--SP]") == 0) {
395 DEBUG_PRINTFQ("is PUSH\n");
396 return 0x18;
397 }
398
399 if (strcasecmp(operand, "PEEK") == 0
400 || strcasecmp(operand, "[SP]") == 0) {
401 DEBUG_PRINTFQ("is PEEK\n");
402 return 0x19;
403 }
404
405 /* this could be better, if we had a real token tree */
406 if (strncasecmp(operand, "PICK", 4) == 0) {
407 DEBUG_PRINTFQ("is PICK ");
408
409 errno = 0;
410 l = strtoul(operand + 4, &ep, 0);
411 if (errno == 0
412 && (*(operand + 4) && (*ep == '\0')) ) {
413 if (l > 0xffff) {
414 DEBUG_PRINTFQ("(out of range)\n");
415 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
416 return -1;
417 }
418 } else if (errno == ERANGE) {
419 DEBUG_PRINTFQ("(out of range)\n");
420 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
421 return -1;
422 }
423 *nextword = l & 0xffff;
424 *nextwordused += 1;
425 DEBUG_PRINTFQ("0x%04x\n", *nextword);
426 return 0x1a;
427 }
428
429 if (strcasecmp(operand, "SP") == 0) {
430 DEBUG_PRINTFQ("is register SP\n");
431 return 0x1b;
432 }
433 if (strcasecmp(operand, "PC") == 0) {
434 DEBUG_PRINTFQ("is register PC\n");
435 return 0x1c;
436 }
437 if (strcasecmp(operand, "EX") == 0) {
438 DEBUG_PRINTFQ("is register EX\n");
439 return 0x1d;
440 }
441
442 /* is the operand [bracketed]? */
443 if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
444 /* eat the brackets */
445 operand[strlen(operand) - 1] = '\0';
446 operand++;
447
448 /* is it [register]? */
449 if (strlen(operand) == 1
450 && strchr("AaBbCcXxYyZzIiJj", *operand)) {
451 DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
452 return 0x08 | register_enumerate_(*operand);
453 }
454
455 /* is it [register+something]? */
456 if ( (ep = strchr(operand, '+')) ) {
457 char *reg;
458 char *constant;
459
460 DEBUG_PRINTFQ("is multipart.. ");
461
462 /* eat the plus */
463 *ep = '\0';
464 ep++;
465
466 /* figure out which one is which */
467 if ((strlen(ep) == 1 && strchr("AaBbCcXxYyZzIiJj", *ep))
468 || (strlen(ep) == 2 && strcasecmp(ep, "SP")) ) {
469 reg = ep;
470 constant = operand;
471 } else if ((strlen(operand) == 1 && strchr("AaBbCcXxYyZzIiJj", *operand))
472 || (strlen(operand) == 2 && strcasecmp(operand, "SP")) ) {
473 reg = operand;
474 constant = ep;
475 } else {
476 DEBUG_PRINTFQ("is unparsable\n");
477 fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
478 return -1;
479 }
480
481 /* check if something is understandable as a value */
482 errno = 0;
483 l = strtoul(constant, &ep, 0);
484 if (errno == 0
485 && (*constant && (*ep == '\0')) ) {
486 /* string conversion went without issue */
487 /* validate it will fit in a word */
488 if (l > 0xffff) {
489 DEBUG_PRINTFQ("is out of range\n");
490 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
491 return -1;
492 }
493
494 /* seems fine */
495 *nextword = l & 0xffff;
496 *nextwordused += 1;
497
498 /* special case [SP+n]/PICK n */
499 if (strlen(reg) == 2) {
500 DEBUG_PRINTFQ("is PICK 0x%04x\n", *nextword);
501 return 0x1a;
502 }
503
504 DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
505 return 0x10 | register_enumerate_(*reg);
506 } else if (errno == ERANGE) {
507 fprintf(stderr, "%s('%s'):%s\n", "strtoul", constant, strerror(errno));
508 }
509
510 /* what? still here? assume it's a label, I guess */
511 /* try to populate nextword with label address */
512 if (label_addr_(labels, operand, nextword)) {
513 DEBUG_PRINTFQ("(deferred label resolution)\n");
514 *nextwordused += 1;
515 return -2;
516 }
517 DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
518 *nextwordused += 1;
519 return 0x10 | register_enumerate_(*reg);
520 }
521
522 /* it must just be a dereferenced literal then */
523
524 errno = 0;
525 l = strtoul(operand, &ep, 0);
526 if (errno == 0
527 && (*operand && (*ep == '\0')) ) {
528 /* string conversion went without issue */
529 /* validate it will fit in a word */
530 if (l > 0xffff) {
531 DEBUG_PRINTFQ("is out of range\n");
532 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
533 return -1;
534 }
535
536 DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
537 *nextword = l & 0xffff;
538 *nextwordused += 1;
539 return 0x1e;
540 } else if (errno) {
541 DEBUG_PRINTFQ("is out of range\n");
542 fprintf(stderr, "trouble with operand '%s': %s\n", operand_orig, strerror(errno));
543 }
544
545 /* not a number? try a label */
546 if (label_addr_(labels, operand, nextword)) {
547 DEBUG_PRINTFQ("(deferred label resolution)\n");
548 *nextwordused += 1;
549 return -2;
550 }
551 DEBUG_PRINTFQ("is a dereferenced label\n");
552 *nextwordused += 1;
553 return 0x1e;
554 }
555
556 /* left with a literal or a label, then */
557
558 errno = 0;
559 l = strtoul(operand, &ep, 0);
560 if (errno == 0
561 || (*operand && (*ep == '\0')) ) {
562 if (l > 0xffff) {
563 DEBUG_PRINTFQ("is out of range\n");
564 fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
565 return -1;
566 }
567
568 DEBUG_PRINTFQ("is literal value (%lu)\n", l);
569 if (l < 0x1f) {
570 return l + 0x21;
571 }
572 if (l == 0xffff) {
573 return 0x20;
574 }
575
576 *nextword = l & 0xffff;
577 *nextwordused += 1;
578 return 0x1f;
579 }
580
581 /* try to populate nextword with label address */
582 if (label_addr_(labels, operand, nextword)) {
583 DEBUG_PRINTFQ("(deferred label resolution)\n");
584 /* assume non-small literal value */
585 *nextwordused += 1;
586 return -2;
587 }
588
589 DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
590 if (allow_short_labels
591 && (*nextword < 0x1f) ) {
592 DEBUG_PRINTF("small value label win\n");
593 return (0x21 + *nextword) & N_BIT_MASK(OPERAND_A_BITS);
594 }
595 if (allow_short_labels
596 && (*nextword == 0xffff) ) {
597 DEBUG_PRINTF("small value label win\n");
598 return 0x20;
599 }
600
601 *nextwordused += 1;
602 return 0x1f;
603 }
604
605 /* prints an instruction's assembly */
606 static inline
607 int instruction_print_(struct instruction_ *i, unsigned int with_label) {
608 struct operand_ *o;
609 int r;
610
611 if (with_label)
612 r = printf("%-16s ", i->label ? i->label : "");
613
614 r = printf("%3s", i->opcode ? i->opcode : "");
615
616 for (o = i->operands; o; o = o->next)
617 r += printf(" %s%s", o->operand, o->next ? "," : "");
618
619 if (i->ready) {
620 DCPU16_WORD l;
621 printf(" [");
622 l = dcpu16_mnemonify_buf(i->instr_words);
623 printf("]");
624
625 if (i->length != l)
626 DEBUG_PRINTF("!!internal inconsistency!! i->length:%u l:%hu should match\n", i->length, l);
627 }
628 return r;
629 }
630
631 /* tokenize_line_
632 * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
633 * [label] instruction [operand[,operand[,...]]]
634 * Does no validation of contents of any of these tokens, as of yet.
635 * does not clean up after itself if a malloc fails
636 */
637 static
638 int tokenize_line_(char *line, struct instruction_ **next_instr) {
639 const char const *whitespace = " \t\n";
640 const char const *quotes = "\"'`";
641 struct instruction_ *instr = NULL;
642 char *x, *st, *qt;
643 char *label, *opcode;
644 struct operand_ *operand_list = NULL;
645 struct operand_ **operand_tail = &operand_list;
646 size_t instr_words_needed = 0;
647
648 assert(line);
649 assert(next_instr);
650
651 *next_instr = NULL;
652
653 /* strip leading whitespace */
654 line += strspn(line, whitespace);
655 if (*line == '\0')
656 return 0;
657
658 /* set first bare ';' to '\0', thus isolating any comments */
659 /* here we only care about the side-effect of truncating the first separator character */
660 (void)strqtok_r(line, ";", '\\', quotes, &qt, &st);
661 /* we don't care if there was an unmatched quote at this point, let's see what happens */
662 if (*line == '\0')
663 return 0;
664
665 /* carve off the first token, determine if it is a label */
666 x = strqtok_r(line, whitespace, '\\', quotes, &qt, &st);
667 if (x == NULL || *x == '\0')
668 return 0;
669 if (qt) {
670 /* labels could contain an unmatched quote character, I guess? */
671 qt = NULL;
672 }
673
674 /* we have something, try to make sense of what it is */
675
676 #ifdef NON_SPEC_LABELS
677 /* I want my labels like 'label:' */
678 if ( *(x + strlen(line) - 1) == ':' ) {
679 *(x + strlen(line) - 1) = '\0';
680 DEBUG_PRINTF("label: %s\n", x);
681
682 label = x;
683
684 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
685 } else {
686 label = NULL;
687 opcode = x;
688 }
689 #endif /* NON_SPEC_LABELS */
690
691 /* spec gives example of labels as ':label' */
692 if (*x == ':') {
693 *x = '\0';
694 x++;
695 label = x;
696 opcode = strqtok_r(NULL, whitespace, '\\', quotes, &qt, &st);
697 } else {
698 label = NULL;
699 opcode = x;
700 }
701 /* opcodes shouldn't have quotes, so we'll ignore any unmatched quotes again */
702
703 if (opcode && *opcode) {
704 /* if we have an opcode, we'll need at least one word to compile instruction */
705 instr_words_needed++;
706
707 /* build a list of operands to hang off this instruction */
708 while ( (x = strqtok_r(NULL, ",", '\\', quotes, &qt, &st)) ) {
709 struct operand_ *new_operand;
710 char *y;
711
712 /* trim whitespaces */
713 x += strspn(x, whitespace);
714
715 if (*x) {
716 for (y = x + strlen(x) - 1; *y; y--) {
717 if (strchr(whitespace, *y)) {
718 *y = '\0';
719 }
720 }
721 }
722 /* nothing left? */
723 if (*x == '\0') {
724 fprintf(stderr, "null operand encountered\n");
725 return -1;
726 }
727
728 DEBUG_PRINTF("tokenized operand '%s'\n", x);
729
730 new_operand = malloc(sizeof *new_operand);
731 if (new_operand == NULL) {
732 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
733 return -1;
734 }
735
736 new_operand->operand = strdup(x);
737 if (new_operand->operand == NULL) {
738 fprintf(stderr, "%s():%s\n", "strdup", strerror(errno));
739 return -1;
740 }
741
742 new_operand->next = NULL;
743
744 if (strchr(quotes, x[0])) {
745 /* if this is a quoted operand, assuming we are in a DAT statement, it will take up slightly less room than it is long */
746 instr_words_needed += strlen(x) - 1;
747 }
748 instr_words_needed++;
749
750 *operand_tail = new_operand;
751 operand_tail = &(*operand_tail)->next;
752 }
753 }
754
755 DEBUG_PRINTF("allocating new instruction with room for %zu bytes\n", instr_words_needed);
756
757 instr = calloc(1, (instr_words_needed * sizeof *instr->instr_words) + sizeof *instr);
758 if (instr == NULL) {
759 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
760 return -1;
761 }
762
763 if (label) {
764 instr->label = strdup(label);
765 if (instr->label == NULL) {
766 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
767 return -1;
768 }
769 } else {
770 label = NULL;
771 }
772
773 if (opcode) {
774 instr->opcode = strdup(opcode);
775 if (instr->opcode == NULL) {
776 fprintf(stderr, "%s():%s\n", "malloc", strerror(errno));
777 return -1;
778 }
779 } else {
780 opcode = NULL;
781 }
782
783 instr->operands = operand_list;
784
785 *next_instr = instr;
786
787 return 0;
788 }
789
790 /* try to generate bytecode for an instruction */
791 /* returns -1 on unrecoverable error */
792 static
793 int instr_assemble_(struct dynamic_array *labels, struct instruction_ *i, unsigned int allow_short_labels) {
794 unsigned int nwu = 0; /* number of words used */
795 unsigned int incomplete = 0;
796 int bits;
797 struct operand_ *o = i->operands;
798
799 if (opt_.verbose > 2) {
800 printf("%s: assembling %p ", __func__, (void *)i);
801 instruction_print_(i, 1);
802 printf("(line %zu)\n", i->src_line);
803 }
804
805 if (i->opcode == NULL) {
806 assert(i->label);
807 assert(i->operands == NULL);
808 /* just a label, move along */
809 i->length = 0;
810 i->ready = 1;
811 return 0;
812 }
813
814 /* special case DAT */
815 if (strncasecmp(i->opcode, "DAT", 3) == 0) {
816 DEBUG_PRINTF("processing DAT...\n");
817
818 i->length = 0;
819
820 for ( /* */ ; o; o = o->next) {
821 size_t j, dat_len;
822 char *x;
823 unsigned long l;
824
825 DEBUG_PRINTF("DAT operand:'%s' next:%p\n", o->operand, (void *)o->next);
826
827 /* is this a string? */
828 /* does it start with a quote, and end with the same quote? */
829 if ( (x = strchr("\"'`", o->operand[0])) ) {
830 dat_len = strlen(o->operand) - 1;
831 if (o->operand[dat_len] == *x) {
832 /* it is a string */
833 DEBUG_PRINTF("DAT string operand: %s\n", o->operand);
834
835 for (j = 0, x = o->operand + 1;
836 j < dat_len - 1;
837 j++, x++) {
838 i->instr_words[i->length] = *x;
839 i->length++;
840 }
841 /* Note that strings in DAT do not include their zero-terminators */
842 /* specify as 'DAT "string", 0' */
843 }
844 continue;
845 }
846
847 /* is this a number? */
848 char *ep;
849 errno = 0;
850 l = strtoul(o->operand, &ep, 0);
851 if (errno == 0
852 && (*o->operand && (*ep == '\0')) ) {
853 /* conversion succeeded */
854 if (l > 0xffff) {
855 fprintf(stderr, "value '%lu' out of range\n", l);
856 return -1;
857 }
858 i->instr_words[i->length] = l;
859 i->length++;
860 continue;
861 }
862
863 /* otherwise assume it's a label, even if we don't know what it is */
864 if (label_addr_(labels, o->operand, &i->instr_words[i->length])) {
865 DEBUG_PRINTF("(deferred label '%s' resolution)\n", o->operand);
866 incomplete = 1;
867 }
868 i->length++;
869 }
870
871 if (incomplete) {
872 DEBUG_PRINTF("pending label address\n");
873 } else {
874 i->ready = 1;
875 }
876
877 return 0;
878 } /* end of DAT */
879
880 /* start with opcode bits */
881 bits = opcode_bits_(i->opcode);
882 if (bits < 0) {
883 fprintf(stderr, "unrecognized instruction '%s%s", i->opcode, i->operands ? " " : "");
884 for (o = i->operands; o; o = o->next)
885 fprintf(stderr, " %s%s", o->operand, o->next ? "," : "");
886 fprintf(stderr, "'\n");
887 return -1;
888 }
889 i->instr_words[0] |= bits & N_BIT_MASK(OPCODE_BITS);
890
891 /* in rendered bytecode, all instructions have a and b operands; nbi instructions occupy 'b operand' bits. */
892 if ((bits & N_BIT_MASK(OPCODE_BITS)) == 0) {
893 bits = nbi_opcode_bits_(i->opcode);
894 if (bits < 0) {
895 fprintf(stderr, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
896 exit(EX_SOFTWARE);
897 }
898 } else {
899 if (o == NULL) {
900 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
901 return -1;
902 }
903 bits = value_bits_(labels, o->operand, i->instr_words + 1, &nwu, allow_short_labels);
904 if (bits == -1) {
905 fprintf(stderr, "couldn't assemble instruction\n");
906 return -1;
907 } else if (bits == -2) {
908 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
909 /* keep going, but don't finalize until we can calculate label address */
910 incomplete = 1;
911 bits = 0;
912 }
913 o = o->next;
914 }
915 if (bits > N_BIT_MASK(OPERAND_B_BITS)) {
916 fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "b");
917 return -1;
918 }
919 i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_B_BITS)) << OPCODE_BITS;
920
921 if (o == NULL) {
922 fprintf(stderr, "'%s' requires more operands\n", i->opcode);
923 return -1;
924 }
925
926 bits = value_bits_(labels, o->operand, i->instr_words + 1 + nwu, &nwu, allow_short_labels);
927 if (bits == -1) {
928 fprintf(stderr, "couldn't assemble instruction\n");
929 return -1;
930 } else if (bits == -2) {
931 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__);
932 /* keep going, but don't finalize until we can calculate label address */
933 incomplete = 1;
934 bits = 0;
935 }
936 o = o->next;
937 if (bits > N_BIT_MASK(OPERAND_A_BITS)) {
938 fprintf(stderr, "%s: internal error: operand '%s' generated out of range\n", __func__, "a");
939 }
940 i->instr_words[0] |= (bits & N_BIT_MASK(OPERAND_A_BITS)) << (OPCODE_BITS + OPERAND_B_BITS);
941
942 if (o != NULL) {
943 fprintf(stderr, "too many operands\n");
944 return -1;
945 }
946
947 /* counting labels as words, we now know at least the maximum instruction length */
948
949 i->length = nwu + 1;
950
951 DEBUG_PRINTF("instruction words: [%u]", i->length);
952 for (bits = 0; bits <= (int)nwu; bits++)
953 DEBUG_PRINTFQ(" %04x", i->instr_words[bits]);
954
955 if (incomplete) {
956 DEBUG_PRINTFQ(" (preliminary)");
957 } else {
958 i->ready = 1;
959 }
960
961 DEBUG_PRINTFQ("\n");
962
963 return 0;
964 }
965
966 /* parse_stream_
967 * read lines from stream f
968 * break each line into parts, populate parts into structures
969 */
970 static
971 int parse_stream_(FILE *f, const char *src, struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
972 struct instruction_ *instr, **instr_list_entry;
973 unsigned int line = 0;
974 int retval = 0;
975 char buf[0x4000];
976
977 buf[sizeof buf - 1] = '\0';
978
979 while (fgets(buf, sizeof buf, f)) {
980 line++;
981
982 if (buf[sizeof buf - 1] != '\0') {
983 fprintf(stderr, "%s:%u:%s", src, line, "input line too long\n");
984 retval = -1;
985 break;
986 }
987
988 if (tokenize_line_(buf, &instr)) {
989 fprintf(stderr, "%s:%u:%s", src, line, "trouble tokenizing input\n");
990 retval = -1;
991 break;
992 }
993
994 if (instr) {
995 instr->src_line = line;
996 /* add to list of instructions */
997 instr_list_entry = dynarray_add(instructionps, &instr);
998 if (instr_list_entry == NULL) {
999 fprintf(stderr, "%s:%u:%s", src, line, "could not populate instruction list\n");
1000 break;
1001 }
1002
1003 if (instr->label) {
1004 struct label_ new_label = {
1005 .label = instr->label,
1006 .instr = instr_list_entry,
1007 .ready = 0,
1008 .addr = 0,
1009 };
1010 if (label_find_(labels, instr->label)) {
1011 fprintf(stderr, "%s:%u:%s", src, line, "duplicate label\n");
1012 break;
1013 }
1014
1015 if (dynarray_add(labels, &new_label) == NULL) {
1016 fprintf(stderr, "%s:%u:%s", src, line, "could not populate label list\n");
1017 break;
1018 }
1019 label_addr_calculate_(instructionps, labels);
1020 }
1021
1022 if (instr_assemble_(labels, instr, allow_short_labels)) {
1023 fprintf(stderr, "%s:%u:%s", src, line, "could not assemble instruction\n");
1024 break;
1025 }
1026 }
1027 }
1028 if (ferror(f)) {
1029 fprintf(stderr, "%s():%s\n", "fgets", strerror(errno));
1030 return -1;
1031 }
1032 if (! feof(f)) {
1033 fprintf(stderr, "parsing aborted\n");
1034 return -1;
1035 }
1036
1037 return retval;
1038 }
1039
1040 /* assemble_check_
1041 * make a full pass over instruction list to resolve labels
1042 */
1043 static
1044 int assemble_check_(struct dynamic_array *instructionps, struct dynamic_array *labels, unsigned int allow_short_labels) {
1045 int retval = 0;
1046 size_t x;
1047
1048 /* fixing short labels .... */
1049 /* by here we have our list of instructions and their maximum instruction lengths */
1050 /* and we have a list of addresses, based on those maximum lengths */
1051 /* So, if doing short labels, all label addresses are now suspect, so recompute them all... */
1052 /* and reassemble.. */
1053 /* uh.. what else am I forgetting.. this method won't work for labels approaching the limit */
1054 /* of short form addresses, when there are more than the difference number of short form labels used previous to those addresses */
1055
1056 /* try this? keep another list of locations a label address is used */
1057 /* as we step forward, and recompute an address, back up to first occurence of address, make sure nothing else has changed */
1058
1059 DEBUG_PRINTF(" final pass of assembler...\n");
1060 for (x = 0; x < instructionps->entries; x++) {
1061 struct instruction_ **instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, x);
1062 retval = instr_assemble_(labels, *instrp, allow_short_labels);
1063 if (retval) {
1064 fprintf(stderr, "instruction %zu failed to assemble\n", x);
1065 return retval;
1066 }
1067 if (! (*instrp)->ready) {
1068 fprintf(stderr, "instruction not resolvable\n");
1069 return -1;
1070 }
1071 }
1072
1073 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
1074 for (x = 0; x < labels->entries; x++) {
1075 struct label_ *l = (struct label_ *)DYNARRAY_ITEM(*labels, x);
1076 if (! l->ready)
1077 retval |= -1;
1078 if (opt_.verbose) {
1079 printf("%3s0x%04x %-32s ",
1080 l->ready ? "" : "*",
1081 l->addr,
1082 l->label);
1083 instruction_print_(*(l->instr), 0);
1084 printf("\n");
1085 }
1086 }
1087
1088 VERBOSE_PRINTF("\n");
1089
1090 if (retval)
1091 fprintf(stderr, "some labels could not be resolved\n");
1092
1093 return retval;
1094 }
1095
1096 /* output_
1097 * write assembled words to named file
1098 */
1099 static
1100 int output_(struct dynamic_array *instructionps, const char *filename) {
1101 FILE *of = NULL;
1102 struct instruction_ **instrp;
1103 size_t i, r, total_words = 0;
1104 size_t x;
1105
1106 if (! opt_.dryrun) {
1107 of = fopen(filename, "w");
1108 if (of == NULL) {
1109 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1110 return -1;
1111 }
1112 }
1113
1114 for (i = 0; i < instructionps->entries; i++) {
1115 instrp = (struct instruction_ **)DYNARRAY_ITEM(*instructionps, i);
1116
1117 if (opt_.verbose) {
1118 int s;
1119 s = instruction_print_(*instrp, 1);
1120 printf("%*s;", (44 - s) > 0 ? (44 - s) : 0, "");
1121 for (x = 0; x < (*instrp)->length; x++) {
1122 printf(" %04x", (*instrp)->instr_words[x]);
1123 }
1124 printf("\n");
1125 }
1126
1127 if (of) {
1128 r = fwrite((*instrp)->instr_words, sizeof(DCPU16_WORD), (*instrp)->length, of);
1129 if (r < (*instrp)->length) {
1130 fprintf(stderr, "%s():%s\n", "fwrite", strerror(errno));
1131 return -1;
1132 }
1133 }
1134 total_words += (*instrp)->length;
1135 }
1136
1137 fprintf(stderr, "%s 0x%04zx instructions as 0x%04zx words\n",
1138 opt_.dryrun ? "assembled" : "wrote",
1139 i,
1140 total_words);
1141
1142 return 0;
1143 }
1144
1145 static struct dynamic_array *instructionps_;
1146 static struct dynamic_array *labels_;
1147
1148 int main(int argc, char *argv[]) {
1149 const char *out_filename = NULL;
1150 unsigned int allow_short_labels = 0;
1151 int c;
1152
1153 while ( (c = getopt(argc, argv, "dhsvo:")) != EOF ) {
1154 switch (c) {
1155 case 'd':
1156 opt_.dryrun++;
1157 break;
1158
1159 case 's':
1160 allow_short_labels++;
1161 break;
1162
1163 case 'o':
1164 if (out_filename) {
1165 fprintf(stderr, "Sorry, I can only write one file at a time.\n");
1166 exit(EX_CANTCREAT);
1167 }
1168 out_filename = optarg;
1169 break;
1170
1171 case 'v':
1172 opt_.verbose++;
1173 break;
1174
1175 case 'h':
1176 usage_(argv[0], 1);
1177 exit(EX_OK);
1178
1179 default:
1180 usage_(argv[0], 0);
1181 exit(EX_USAGE);
1182 }
1183 }
1184
1185 argc -= optind;
1186 argv += optind;
1187
1188 if (out_filename == NULL)
1189 out_filename = out_filename_default_;
1190
1191 /* init tables */
1192 instructionps_ = dynarray_new(sizeof (struct instruction_ *), 1024);
1193 labels_ = dynarray_new(sizeof(struct label_), 256);
1194 if (instructionps_ == NULL
1195 || labels_ == NULL) {
1196 fprintf(stderr, "failed to initialize\n");
1197 exit(EX_OSERR);
1198 }
1199
1200 /* if filenames were specified, parse them instead of stdin */
1201 if (argc) {
1202 while (argc) {
1203 char *filename = *argv;
1204 FILE *f = fopen(filename, "r");
1205
1206 argc--, argv++;
1207
1208 if (f == NULL) {
1209 fprintf(stderr, "%s('%s'):%s\n", "fopen", filename, strerror(errno));
1210 continue;
1211 }
1212
1213 VERBOSE_PRINTF("assembling '%s'...\n", filename);
1214 c = parse_stream_(f, filename, instructionps_, labels_, allow_short_labels);
1215 fclose(f);
1216 if (c)
1217 break;
1218 }
1219 } else {
1220 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
1221 c = parse_stream_(stdin, "-", instructionps_, labels_, allow_short_labels);
1222 }
1223 if (c) {
1224 fprintf(stderr, "could not parse input, aborting\n");
1225 exit(EX_DATAERR);
1226 }
1227
1228 if (assemble_check_(instructionps_, labels_, allow_short_labels)) {
1229 fprintf(stderr, "errors prevented assembly\n");
1230 exit(EX_DATAERR);
1231 }
1232
1233 if (output_(instructionps_, out_filename)) {
1234 fprintf(stderr, "failed to create output\n");
1235 exit(EX_OSERR);
1236 }
1237
1238 exit(EX_OK);
1239 }