798bf7ece0d8061a01c50e66b258cc64bd710be2
12 * quick and dirty assembler for dcpu16
14 * Justin Wind <justin.wind@gmail.com>
15 * 2012 04 07 - implementation started
16 * 2012 04 10 - functional
19 * needs ability to specify location for code or data
20 * short labels not correctly computed
23 static const char * const src_id_
= "$Id$";
25 const char const out_filename_default_
[] = "a.out";
26 unsigned int verbose_
= 0;
27 unsigned int dryrun_
= 0;
30 #define DEBUG_PRINTF(...) do { if (verbose_ > 2) printf(__VA_ARGS__); } while (0)
31 #define VERBOSE_PRINTF(...) do { if (verbose_) printf(__VA_ARGS__); } while (0)
35 void usage_(char *prog
, unsigned int full
) {
36 FILE *f
= full
? stdout
: stderr
;
37 char *x
= strrchr(prog
, '/');
43 fprintf(f
, "%s -- \n\n",
46 fprintf(f
, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
50 fprintf(f
, "\nOptions:\n"
51 "\t-h -- this screen\n"
52 "\t-o <file> -- output to <file> [default: %s]\n"
53 "\t-s -- allow short labels in instruction words\n"
54 "\t-d -- dry run, print results, do not write to file\n"
55 "\t-v -- verbose output\n",
56 out_filename_default_
);
58 fprintf(f
, "\n%78s\n",
64 /* maintain an array of the instructions we have parsed */
66 struct instruction_list_
*instr_list_new(void) {
67 size_t init_size
= 1024;
68 struct instruction_list_
*il
= malloc(IL_SIZE(init_size
));
70 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
73 il
->allocated
= init_size
;
79 int instr_list_insert(struct instruction_list_
**il
, struct instruction_
*i
) {
80 /* make room make room */
81 if ((*il
)->entries
- 1 == (*il
)->allocated
) {
82 size_t new_allocated
= (*il
)->allocated
+ 1024;
83 void *tmp_ptr
= realloc(*il
, IL_SIZE(new_allocated
));
84 if (tmp_ptr
== NULL
) {
85 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
89 (*il
)->allocated
= new_allocated
;
92 (*il
)->instr
[(*il
)->entries
] = i
;
97 /* also maintain a list of the labels we've seen, indexed back to their instructions. */
98 /* FIXME: ugh, this could all stand to be rewritten cleaner */
99 /* these lists could be rearranged to be a lot easier to wrangle and/or maybe use common interfaces */
100 /* they were thrown together on the fly */
102 struct label_list_
*label_list_new(void) {
103 size_t init_size
= 256;
104 struct label_list_
*ll
= malloc(LL_SIZE(init_size
));
106 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
109 ll
->allocated
= init_size
;
114 /* instr here is index into instruction list */
116 int label_list_insert(struct label_list_
**ll
, struct instruction_
**instr
) {
117 if ((*ll
)->entries
- 1 == (*ll
)->allocated
) {
118 size_t new_allocated
= (*ll
)->allocated
+ 256;
119 void *tmp_ptr
= realloc(*ll
, IL_SIZE(new_allocated
));
120 if (tmp_ptr
== NULL
) {
121 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
125 (*ll
)->allocated
= new_allocated
;
128 DEBUG_PRINTF("TRACE: adding label '%s'\n", (*instr
)->label
);
130 (*ll
)->label
[(*ll
)->entries
].label
= (*instr
)->label
;
131 (*ll
)->label
[(*ll
)->entries
].instr
= instr
;
136 /* locate the index of a labelled instruction within the instruction list */
138 struct instruction_
**label_list_find_instr(struct label_list_
*ll
, char *label
) {
141 for (x
= 0; x
< ll
->entries
; x
++) {
142 if (strcmp(ll
->label
[x
].label
, label
) == 0)
143 return ll
->label
[x
].instr
;
148 /* look up the address of a calculated address */
150 int label_list_find_addr(struct label_list_
*ll
, char *label
, DCPU16_WORD
*addr
) {
153 for (x
= 0; x
< ll
->entries
; x
++) {
154 if (strcmp(ll
->label
[x
].label
, label
) == 0) {
155 if (ll
->label
[x
].ready
== 1) {
156 *addr
= ll
->label
[x
].addr
;
164 /* attempt to determine the addresses of labels */
166 void label_addr_calculate_(struct instruction_list_
*il
, struct label_list_
*ll
) {
169 /* walk through labels */
170 for (i
= 0; i
< ll
->entries
; i
++) {
171 struct instruction_
**instr
;
172 unsigned int word_count
= 0;
174 if (ll
->label
[i
].ready
)
178 * walk backwards through the list of instructions
179 * until we get to the start or a known prior label address
182 for (instr
= ll
->label
[i
].instr
; instr
>= il
->instr
; instr
--) {
184 word_count
+= (*instr
)->length
;
187 && strcmp((*instr
)->label
, ll
->label
[i
].label
)) {
190 if (label_list_find_addr(ll
, (*instr
)->label
, &addr
)) {
191 fprintf(stderr
, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
200 ll
->label
[i
].addr
= word_count
;
201 ll
->label
[i
].ready
= 1;
202 DEBUG_PRINTF("label '%s' has addr of 0x%04x\n", ll
->label
[i
].label
, word_count
);
207 void instr_free_(struct instruction_
*i
) {
212 while (i
->operands
) {
213 struct operand_
*o
= i
->operands
;
215 i
->operands
= o
->next
;
222 /* generate the nibble for a given basic opcode */
224 int opcode_bits_(char *opcode
) {
228 } opcodes_lower_nibble
[] = {
230 /* { "future nbi instruction", 0x00 }, */
249 for (o
= opcodes_lower_nibble
; o
->op
[0]; o
++) {
250 if (strcasecmp(o
->op
, opcode
) == 0)
254 if (o
->op
[0] == '\0') {
255 fprintf(stderr
, "unknown instruction '%s'\n", opcode
);
262 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
264 int nbi_opcode_bits_(char *nbi_opcode
) {
268 } nbi_opcodes_bits
[] = {
269 { " ", 0x00 }, /* reserved for future */
274 for (o
= nbi_opcodes_bits
; o
->op
[0]; o
++) {
275 if (strcasecmp(o
->op
, nbi_opcode
) == 0)
279 if (o
->op
[0] == '\0') {
280 fprintf(stderr
, "unknown nbi instruction '%s'\n", o
->op
);
287 /* convert register character like 'x' to value like 0x03 */
289 unsigned int register_enumerate_(char r
) {
290 const char regs
[] = "AaBbCcXxYyZzIiJj";
291 const char *x
= strchr(regs
, r
);
296 fprintf(stderr
, "internal error, unknown register character 0x%02x\n", r
);
300 /* removes all occurences of chars from buf */
302 void buf_strip_chars_(char *buf
, char *chars
) {
305 for (s
= d
= buf
; *s
; s
++, d
++) {
306 while (*s
&& strchr(chars
, *s
)) {
317 * generate the six bits for a given operand string
318 * returns -1 if it could not parse the operand
319 * returns -2 if it could not parse the operand due to an unresolved label
320 * notes: nextword may be overwritten even if it's not used in final instruction
323 int value_bits_(struct label_list_
*ll
, char *operand_orig
, DCPU16_WORD
*nextword
, unsigned int *nextwordused
, unsigned int allow_short_labels
) {
324 static char *operand
= NULL
;
325 static size_t operand_sz
= 0;
331 Our operand working buffer shouldn't ever need to be too big,
332 but DAT might blow that assumption.
334 if (operand_sz
<= strlen(operand_orig
)) {
336 size_t new_sz
= strlen(operand_orig
);
342 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__
, new_sz
);
343 tmp_ptr
= realloc(operand
, new_sz
);
344 if (tmp_ptr
== NULL
) {
345 fprintf(stderr
, "%s(%zu):%s\n", "realloc", new_sz
, strerror(errno
));
352 o
= strcpy(operand
, operand_orig
);
354 DEBUG_PRINTF("%s: operand '%s' ", __func__
, operand
); /* completed later */
356 /* this is a very stupid parser */
358 /* first, let's trim all whitespace out of string at once to make parsing easier */
359 buf_strip_chars_(operand
, " \t\n");
361 /* single character might match a register */
362 if (strlen(operand
) == 1
363 && strchr("AaBbCcXxYyZzIiJj", *operand
)) {
364 DEBUG_PRINTF("is register %c\n", *operand
);
365 return register_enumerate_(*operand
);
369 if (strcasecmp(operand
, "POP") == 0) {
370 DEBUG_PRINTF("is POP\n");
373 if (strcasecmp(operand
, "PUSH") == 0) {
374 DEBUG_PRINTF("is PUSH\n");
377 if (strcasecmp(operand
, "PEEK") == 0) {
378 DEBUG_PRINTF("is PEEK\n");
381 if (strcasecmp(operand
, "SP") == 0) {
382 DEBUG_PRINTF("is register SP\n");
385 if (strcasecmp(operand
, "PC") == 0) {
386 DEBUG_PRINTF("is register PC\n");
389 if (strcasecmp(operand
, "O") == 0) {
390 DEBUG_PRINTF("is register O\n");
394 /* is the operand [bracketed]? */
395 if (operand
[0] == '[' && operand
[strlen(operand
) - 1] == ']') {
396 /* eat the brackets */
397 operand
[strlen(operand
) - 1] = '\0';
400 /* is it [register]? */
401 if (strlen(operand
) == 1
402 && strchr("AaBbCcXxYyZzIiJj", *operand
)) {
403 DEBUG_PRINTF("is dereferenced register %c\n", *operand
);
404 return 0x08 | register_enumerate_(*operand
);
407 /* is it [register+something]? */
408 if ( (ep
= strchr(operand
, '+')) ) {
416 /* figure out which one is which */
418 && strchr("AaBbCcXxYyZzIiJj", *ep
)) {
421 } else if (strlen(operand
) == 1
422 && strchr("AaBbCcXxYyZzIiJj", *operand
) ) {
426 DEBUG_PRINTF("is unparsable\n");
427 fprintf(stderr
, "couldn't parse operand '%s'\n", operand_orig
);
431 /* check if something is understandable as a value */
433 l
= strtoul(constant
, &ep
, 0);
435 && (*constant
&& (*ep
== '\0')) ) {
436 /* string conversion went without issue */
437 /* validate it will fit in a word */
439 DEBUG_PRINTF("is out of range\n");
440 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
445 *nextword
= l
& 0xffff;
447 DEBUG_PRINTF("is a dereferenced register (%c) + constant (%hu)\n", *reg
, *nextword
);
448 return 0x10 | register_enumerate_(*reg
);
450 DEBUG_PRINTF("is out of range\n");
451 fprintf(stderr
, "trouble with operand '%s': %s\n", operand_orig
, strerror(errno
));
455 /* what? still here? assume it's a label, I guess */
456 /* try to populate nextword with label address */
457 if (label_list_find_addr(ll
, operand
, nextword
)) {
458 DEBUG_PRINTF("(deferred label resolution)\n");
462 DEBUG_PRINTF("is a dereferenced register (%c) + label\n", *reg
);
464 return 0x10 | register_enumerate_(*reg
);
467 /* it must just be a dereferenced literal then */
470 l
= strtoul(operand
, &ep
, 0);
472 && (*operand
&& (*ep
== '\0')) ) {
473 /* string conversion went without issue */
474 /* validate it will fit in a word */
476 DEBUG_PRINTF("is out of range\n");
477 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
481 DEBUG_PRINTF("is a dereferenced literal value (%hu)\n", *nextword
);
482 *nextword
= l
& 0xffff;
486 DEBUG_PRINTF("is out of range\n");
487 fprintf(stderr
, "trouble with operand '%s': %s\n", operand_orig
, strerror(errno
));
490 /* not a number? try a label */
491 if (label_list_find_addr(ll
, operand
, nextword
)) {
492 DEBUG_PRINTF("(deferred label resolution)\n");
496 DEBUG_PRINTF("is a dereferenced label\n");
501 /* left with a literal or a label, then */
504 l
= strtoul(operand
, &ep
, 0);
506 || (*operand
&& (*ep
== '\0')) ) {
508 DEBUG_PRINTF("is out of range\n");
509 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
513 DEBUG_PRINTF("is literal value (%lu)\n", l
);
518 *nextword
= l
& 0xffff;
523 /* try to populate nextword with label address */
524 if (label_list_find_addr(ll
, operand
, nextword
)) {
525 DEBUG_PRINTF("(deferred label resolution)\n");
526 /* assume non-small literal value */
531 DEBUG_PRINTF("is label '%s' (0x%02hx)\n", operand
, *nextword
);
532 if (*nextword
< 0x20 && allow_short_labels
) {
533 DEBUG_PRINTF("small value label win\n");
534 return (0x20 + *nextword
) & 0x3f;
542 int instruction_print_(struct instruction_
*i
, unsigned int with_label
) {
547 r
= printf("%-16s %3s", i
->label
? i
->label
: "", i
->opcode
);
549 r
= printf("%3s", i
->opcode
);
551 for (o
= i
->operands
; o
; o
= o
->next
)
552 r
+= printf(" %s%s", o
->operand
, o
->next
? "," : "");
557 /* parse an instruction out of buf, create new instruction struct if seemingly valid */
558 /* does not actually check if instruction is valid yet */
559 /* buf must be 0-terminated */
561 int buf_tokenize_(char *buf
, struct instruction_
**next_instr
) {
562 const char const *sep
= " \t\n";
563 struct instruction_
*instr
= NULL
;
573 assert(next_instr
!= NULL
);
578 if ((x
= strchr(buf
, ';')) != NULL
)
580 /* kill leading whitespace */
581 buf
+= strspn(buf
, " \t\n");
582 /* kill trailing whitespace */
584 x
= buf
+ strlen(buf
);
585 while (strchr(" \t\n", *x
)) {
591 if ((x
= strrchr(buf
, '\n')) != NULL
)
594 /* determine if first token is label, opcode, or we just have a blank line to ignore */
595 x
= strtok_r(buf
, sep
, &st
);
597 /* empty line? nothing to do here. */
602 /* labels end with :, otherwise its an opcode */
603 y
= x
+ strlen(x
) - 1;
607 opcode
= strtok_r(NULL
, sep
, &st
);
609 #else /* OTHER_LABELS */
610 /* labels.. begin? with ':' ? okay, I guess. Whatever. */
611 /* otherwise, it's an opcode */
614 opcode
= strtok_r(NULL
, sep
, &st
);
619 #endif /* OTHER_LABELS */
625 /* extra room for assembled words */
626 instr
= calloc(1, 3 + sizeof *instr
);
628 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
632 instr
->label
= label
? strdup(label
) : NULL
;
633 instr
->opcode
= opcode
? strdup(opcode
) : NULL
;
636 struct operand_
**o_next
= &instr
->operands
;
638 for (x
= strtok_r(operand
, ",", &st
);
640 x
= strtok_r(NULL
, ",", &st
) ) {
641 *o_next
= malloc(3 + sizeof **o_next
); /* FIXME: handle this on the fly later */
643 if (*o_next
== NULL
) {
644 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
650 x
+= strspn(x
, " \t\n");
652 y
= x
+ strlen(x
) - 1;
653 while (strchr(" \t\n", *y
)) {
659 (*o_next
)->operand
= strdup(x
);
660 (*o_next
)->next
= NULL
;
661 o_next
= &((*o_next
)->next
);
670 /* try to generate bytecode for an instruction */
672 int instr_assemble_(struct label_list_
*ll
, struct instruction_
*i
, unsigned int allow_short_labels
) {
673 unsigned int nwu
= 0; /* number of words used */
674 unsigned int incomplete
= 0;
676 struct operand_
*o
= i
->operands
;
679 printf("%s: assembling ", __func__
);
680 instruction_print_(i
,1);
685 /* already assembled, nothing to do */
689 /* special case DAT */
690 if (strncasecmp(i
->opcode
, "DAT", 3) == 0) {
691 /* just dump operands into words, I guess */
692 fprintf(stderr
, "FIXME unhandled raw data\n");
693 /* count total length of data.. */
694 /* realloc instruction */
699 /* start with opcode bits */
700 bits
= opcode_bits_(i
->opcode
);
702 fprintf(stderr
, "unrecognized instruction '%s%s", i
->opcode
, i
->operands
? " " : "");
703 for (o
= i
->operands
; o
; o
= o
->next
)
704 fprintf(stderr
, " %s%s", o
->operand
, o
->next
? "," : "");
705 fprintf(stderr
, "'\n");
708 i
->instr_words
[0] |= 0x0f & bits
;
710 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
711 if ((bits
& 0x0f) == 0) {
712 bits
= nbi_opcode_bits_(i
->opcode
);
714 fprintf(stderr
, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
719 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
722 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ 1, &nwu
, allow_short_labels
);
724 fprintf(stderr
, "couldn't assemble instruction\n");
726 } else if (bits
== -2) {
727 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__
);
728 /* keep going, but don't finalize until we can calculate label address */
734 i
->instr_words
[0] |= (bits
& 0x3f) << 4;
737 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
741 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ nwu
+ 1, &nwu
, allow_short_labels
);
743 fprintf(stderr
, "couldn't assemble instruction\n");
745 } else if (bits
== -2) {
746 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__
);
747 /* keep going, but don't finalize until we can calculate label address */
752 i
->instr_words
[0] |= (bits
& 0x3f) << 10;
755 fprintf(stderr
, "too many operands\n");
759 /* counting labels as words, we now know at least the maximum instruction length */
763 DEBUG_PRINTF("instruction words: [%u]", i
->length
);
764 for (bits
= 0; bits
<= (int)nwu
; bits
++)
765 DEBUG_PRINTF(" %04x", i
->instr_words
[bits
]);
768 DEBUG_PRINTF(" (preliminary)");
779 * read lines from stream f
780 * break each line into parts, populate parts into structures
783 int parse_stream_(FILE *f
, const char *src
, struct instruction_list_
**il
, struct label_list_
**ll
, unsigned int allow_short_labels
) {
784 struct instruction_
*instr
, **instr_list_entry
;
785 unsigned int line
= 0;
789 buf
[sizeof buf
- 1] = '\0';
791 while (fgets(buf
, sizeof buf
, f
)) {
794 if (buf
[sizeof buf
- 1] != '\0') {
795 fprintf(stderr
, "%s:%u:%s", src
, line
, "input line too long\n");
800 if (buf_tokenize_(buf
, &instr
)) {
801 fprintf(stderr
, "%s:%u:%s", src
, line
, "trouble tokenizing input\n");
807 /* add to list of instructions */
808 if (instr_list_insert(il
, instr
)) {
809 fprintf(stderr
, "%s:%u:%s", src
, line
, "could not populate instruction list\n");
811 instr_list_entry
= (*il
)->instr
+ (*il
)->entries
- 1;
814 if (label_list_find_instr(*ll
, instr
->label
)) {
815 fprintf(stderr
, "%s:%u:%s", src
, line
, "duplicate label\n");
818 if (label_list_insert(ll
, instr_list_entry
)) {
819 fprintf(stderr
, "%s:%u:%s", src
, line
, "could not populate label list\n");
821 label_addr_calculate_(*il
, *ll
);
824 instr_assemble_(*ll
, instr
, allow_short_labels
);
828 fprintf(stderr
, "%s():%s\n", "fgets", strerror(errno
));
832 fprintf(stderr
, "parsing aborted\n");
840 * make a full pass over instruction list to resolve labels
843 int assemble_check_(struct instruction_list_
*il
, struct label_list_
*ll
, unsigned int allow_short_labels
) {
847 DEBUG_PRINTF(" final pass of assembler...\n");
848 for (x
= 0; x
< il
->entries
; x
++) {
849 retval
|= instr_assemble_(ll
, il
->instr
[x
], allow_short_labels
);
851 fprintf(stderr
, "instruction failed to assemble\n");
855 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
856 for (x
= 0; x
< ll
->entries
; x
++) {
857 if (! ll
->label
[x
].ready
)
860 printf("%3s0x%04x %-32s ",
861 ll
->label
[x
].ready
? "" : "*",
864 instruction_print_(*(ll
->label
[x
].instr
), 0);
869 VERBOSE_PRINTF("\n");
872 fprintf(stderr
, "some labels could not be resolved\n");
878 int output_(struct instruction_list_
*il
, const char *filename
) {
880 struct instruction_
*instr
;
881 size_t i
, r
, total_words
= 0;
885 of
= fopen(filename
, "w");
887 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
892 for (i
= 0; i
< il
->entries
; i
++) {
893 instr
= il
->instr
[i
];
897 s
= instruction_print_(instr
, 1);
898 printf("%*s;", (44 - s
) > 0 ? (44 - s
) : 0, "");
899 for (x
= 0; x
< instr
->length
; x
++) {
900 printf(" %04x", instr
->instr_words
[x
]);
906 r
= fwrite(instr
->instr_words
, sizeof(DCPU16_WORD
), instr
->length
, of
);
907 if (r
< instr
->length
) {
908 fprintf(stderr
, "%s():%s\n", "fwrite", strerror(errno
));
912 total_words
+= instr
->length
;
915 fprintf(stderr
, "%s 0x%04zx instructions as 0x%04zx words\n",
916 dryrun_
? "assembled" : "wrote",
923 static struct instruction_list_
*il_
;
924 static struct label_list_
*ll_
;
926 int main(int argc
, char *argv
[]) {
927 const char *out_filename
= NULL
;
928 unsigned int allow_short_labels
= 0;
931 while ( (c
= getopt(argc
, argv
, "dhsvo:")) != EOF
) {
938 allow_short_labels
++;
943 fprintf(stderr
, "Sorry, I can only write one file at a time.\n");
946 out_filename
= optarg
;
966 if (out_filename
== NULL
)
967 out_filename
= out_filename_default_
;
970 il_
= instr_list_new();
971 ll_
= label_list_new();
973 /* if filenames were specified, parse them instead of stdin */
976 char *filename
= *argv
;
977 FILE *f
= fopen(filename
, "r");
982 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
986 VERBOSE_PRINTF("assembling '%s'...\n", filename
);
987 parse_stream_(f
, filename
, &il_
, &ll_
, allow_short_labels
);
992 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
993 parse_stream_(stdin
, "-", &il_
, &ll_
, allow_short_labels
);
996 if (assemble_check_(il_
, ll_
, allow_short_labels
)) {
997 fprintf(stderr
, "errors prevented assembly\n");
1001 if (output_(il_
, out_filename
)) {
1002 fprintf(stderr
, "failed to create output\n");