12 * quick and dirty assembler for dcpu16
14 * Justin Wind <justin.wind@gmail.com>
15 * 2012 04 07 - implementation started
16 * 2012 04 10 - functional
19 * needs ability to specify location for code or data
22 static const char * const src_id_
= "$Id$";
24 const char const out_filename_default_
[] = "a.out";
25 unsigned int verbose_
= 0;
26 unsigned int dryrun_
= 0;
29 #define DEBUG_PRINTF(...) do { if (verbose_ > 2) printf(__VA_ARGS__); } while (0)
30 #define VERBOSE_PRINTF(...) do { if (verbose_) printf(__VA_ARGS__); } while (0)
34 void usage_(char *prog
, unsigned int full
) {
35 FILE *f
= full
? stdout
: stderr
;
36 char *x
= strrchr(prog
, '/');
42 fprintf(f
, "%s -- \n\n",
45 fprintf(f
, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
49 fprintf(f
, "\nOptions:\n"
50 "\t-h -- this screen\n"
51 "\t-o <file> -- output to <file> [default: %s]\n"
52 "\t-s -- allow short labels in instruction words\n"
53 "\t-d -- dry run, print results, do not write to file\n"
54 "\t-v -- verbose output\n",
55 out_filename_default_
);
57 fprintf(f
, "\n%78s\n",
63 /* maintain an array of the instructions we have parsed */
65 struct instruction_list_
*instr_list_new(void) {
66 size_t init_size
= 1024;
67 struct instruction_list_
*il
= malloc(IL_SIZE(init_size
));
69 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
72 il
->allocated
= init_size
;
78 int instr_list_insert(struct instruction_list_
**il
, struct instruction_
*i
) {
79 /* make room make room */
80 if ((*il
)->entries
- 1 == (*il
)->allocated
) {
81 size_t new_allocated
= (*il
)->allocated
+ 1024;
82 void *tmp_ptr
= realloc(*il
, IL_SIZE(new_allocated
));
83 if (tmp_ptr
== NULL
) {
84 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
88 (*il
)->allocated
= new_allocated
;
91 (*il
)->instr
[(*il
)->entries
] = i
;
96 /* also maintain a list of the labels we've seen, indexed back to their instructions. */
97 /* FIXME: ugh, this could all stand to be rewritten cleaner */
98 /* these lists could be rearranged to be a lot easier to wrangle and/or maybe use common interfaces */
99 /* they were thrown together on the fly */
101 struct label_list_
*label_list_new(void) {
102 size_t init_size
= 256;
103 struct label_list_
*ll
= malloc(LL_SIZE(init_size
));
105 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
108 ll
->allocated
= init_size
;
113 /* instr here is index into instruction list */
115 int label_list_insert(struct label_list_
**ll
, struct instruction_
**instr
) {
116 if ((*ll
)->entries
- 1 == (*ll
)->allocated
) {
117 size_t new_allocated
= (*ll
)->allocated
+ 256;
118 void *tmp_ptr
= realloc(*ll
, IL_SIZE(new_allocated
));
119 if (tmp_ptr
== NULL
) {
120 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
124 (*ll
)->allocated
= new_allocated
;
127 DEBUG_PRINTF("TRACE: adding label '%s'\n", (*instr
)->label
);
129 (*ll
)->label
[(*ll
)->entries
].label
= (*instr
)->label
;
130 (*ll
)->label
[(*ll
)->entries
].instr
= instr
;
135 /* locate the index of a labelled instruction within the instruction list */
137 struct instruction_
**label_list_find_instr(struct label_list_
*ll
, char *label
) {
140 for (x
= 0; x
< ll
->entries
; x
++) {
141 if (strcmp(ll
->label
[x
].label
, label
) == 0)
142 return ll
->label
[x
].instr
;
147 /* look up the address of a calculated address */
149 int label_list_find_addr(struct label_list_
*ll
, char *label
, DCPU16_WORD
*addr
) {
152 for (x
= 0; x
< ll
->entries
; x
++) {
153 if (strcmp(ll
->label
[x
].label
, label
) == 0) {
154 if (ll
->label
[x
].ready
== 1) {
155 *addr
= ll
->label
[x
].addr
;
163 /* attempt to determine the addresses of labels */
165 void label_addr_calculate_(struct instruction_list_
*il
, struct label_list_
*ll
) {
168 /* walk through labels */
169 for (i
= 0; i
< ll
->entries
; i
++) {
170 struct instruction_
**instr
;
171 unsigned int word_count
= 0;
173 if (ll
->label
[i
].ready
)
177 * walk backwards through the list of instructions
178 * until we get to the start or a known prior label address
181 for (instr
= ll
->label
[i
].instr
; instr
>= il
->instr
; instr
--) {
183 word_count
+= (*instr
)->length
;
186 && strcmp((*instr
)->label
, ll
->label
[i
].label
)) {
189 if (label_list_find_addr(ll
, (*instr
)->label
, &addr
)) {
190 fprintf(stderr
, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
199 ll
->label
[i
].addr
= word_count
;
200 ll
->label
[i
].ready
= 1;
201 DEBUG_PRINTF("label '%s' has addr of 0x%04x\n", ll
->label
[i
].label
, word_count
);
206 void instr_free_(struct instruction_
*i
) {
211 while (i
->operands
) {
212 struct operand_
*o
= i
->operands
;
214 i
->operands
= o
->next
;
221 /* generate the nibble for a given basic opcode */
223 int opcode_bits_(char *opcode
) {
227 } opcodes_lower_nibble
[] = {
229 /* { "future nbi instruction", 0x00 }, */
248 for (o
= opcodes_lower_nibble
; o
->op
[0]; o
++) {
249 if (strcasecmp(o
->op
, opcode
) == 0)
253 if (o
->op
[0] == '\0') {
254 fprintf(stderr
, "unknown instruction '%s'\n", opcode
);
261 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
263 int nbi_opcode_bits_(char *nbi_opcode
) {
267 } nbi_opcodes_bits
[] = {
268 { " ", 0x00 }, /* reserved for future */
273 for (o
= nbi_opcodes_bits
; o
->op
[0]; o
++) {
274 if (strcasecmp(o
->op
, nbi_opcode
) == 0)
278 if (o
->op
[0] == '\0') {
279 fprintf(stderr
, "unknown nbi instruction '%s'\n", o
->op
);
286 /* convert register character like 'x' to value like 0x03 */
288 unsigned int register_enumerate_(char r
) {
289 const char regs
[] = "AaBbCcXxYyZzIiJj";
290 const char *x
= strchr(regs
, r
);
295 fprintf(stderr
, "internal error, unknown register character 0x%02x\n", r
);
299 /* removes all occurences of chars from buf */
301 void buf_strip_chars_(char *buf
, char *chars
) {
304 for (s
= d
= buf
; *s
; s
++, d
++) {
305 while (*s
&& strchr(chars
, *s
)) {
316 * generate the six bits for a given operand string
317 * returns -1 if it could not parse the operand
318 * returns -2 if it could not parse the operand due to an unresolved label
319 * notes: nextword may be overwritten even if it's not used in final instruction
322 int value_bits_(struct label_list_
*ll
, char *operand_orig
, DCPU16_WORD
*nextword
, unsigned int *nextwordused
, unsigned int allow_short_labels
) {
323 static char *operand
= NULL
;
324 static size_t operand_sz
= 0;
330 Our operand working buffer shouldn't ever need to be too big,
331 but DAT might blow that assumption.
333 if (operand_sz
<= strlen(operand_orig
)) {
335 size_t new_sz
= strlen(operand_orig
);
341 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__
, new_sz
);
342 tmp_ptr
= realloc(operand
, new_sz
);
343 if (tmp_ptr
== NULL
) {
344 fprintf(stderr
, "%s(%zu):%s\n", "realloc", new_sz
, strerror(errno
));
351 o
= strcpy(operand
, operand_orig
);
353 DEBUG_PRINTF("%s: operand '%s' ", __func__
, operand
); /* completed later */
355 /* this is a very stupid parser */
357 /* first, let's trim all whitespace out of string at once to make parsing easier */
358 buf_strip_chars_(operand
, " \t\n");
360 /* single character might match a register */
361 if (strlen(operand
) == 1
362 && strchr("AaBbCcXxYyZzIiJj", *operand
)) {
363 DEBUG_PRINTF("is register %c\n", *operand
);
364 return register_enumerate_(*operand
);
368 if (strcasecmp(operand
, "POP") == 0) {
369 DEBUG_PRINTF("is POP\n");
372 if (strcasecmp(operand
, "PUSH") == 0) {
373 DEBUG_PRINTF("is PUSH\n");
376 if (strcasecmp(operand
, "PEEK") == 0) {
377 DEBUG_PRINTF("is PEEK\n");
380 if (strcasecmp(operand
, "SP") == 0) {
381 DEBUG_PRINTF("is register SP\n");
384 if (strcasecmp(operand
, "PC") == 0) {
385 DEBUG_PRINTF("is register PC\n");
388 if (strcasecmp(operand
, "O") == 0) {
389 DEBUG_PRINTF("is register O\n");
393 /* is the operand [bracketed]? */
394 if (operand
[0] == '[' && operand
[strlen(operand
) - 1] == ']') {
395 /* eat the brackets */
396 operand
[strlen(operand
) - 1] = '\0';
399 /* is it [register]? */
400 if (strlen(operand
) == 1
401 && strchr("AaBbCcXxYyZzIiJj", *operand
)) {
402 DEBUG_PRINTF("is dereferenced register %c\n", *operand
);
403 return 0x08 | register_enumerate_(*operand
);
406 /* is it [register+something]? */
407 if ( (ep
= strchr(operand
, '+')) ) {
415 /* figure out which one is which */
417 && strchr("AaBbCcXxYyZzIiJj", *ep
)) {
420 } else if (strlen(operand
) == 1
421 && strchr("AaBbCcXxYyZzIiJj", *operand
) ) {
425 DEBUG_PRINTF("is unparsable\n");
426 fprintf(stderr
, "couldn't parse operand '%s'\n", operand_orig
);
430 /* check if something is understandable as a value */
432 l
= strtoul(constant
, &ep
, 0);
434 && (*constant
&& (*ep
== '\0')) ) {
435 /* string conversion went without issue */
436 /* validate it will fit in a word */
438 DEBUG_PRINTF("is out of range\n");
439 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
444 *nextword
= l
& 0xffff;
446 DEBUG_PRINTF("is a dereferenced register (%c) + constant (%hu)\n", *reg
, *nextword
);
447 return 0x10 | register_enumerate_(*reg
);
449 DEBUG_PRINTF("is out of range\n");
450 fprintf(stderr
, "trouble with operand '%s': %s\n", operand_orig
, strerror(errno
));
454 /* what? still here? assume it's a label, I guess */
455 /* try to populate nextword with label address */
456 if (label_list_find_addr(ll
, operand
, nextword
)) {
457 DEBUG_PRINTF("(deferred label resolution)\n");
461 DEBUG_PRINTF("is a dereferenced register (%c) + label\n", *reg
);
463 return 0x10 | register_enumerate_(*reg
);
466 /* it must just be a dereferenced literal then */
469 l
= strtoul(operand
, &ep
, 0);
471 && (*operand
&& (*ep
== '\0')) ) {
472 /* string conversion went without issue */
473 /* validate it will fit in a word */
475 DEBUG_PRINTF("is out of range\n");
476 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
480 DEBUG_PRINTF("is a dereferenced literal value (%hu)\n", *nextword
);
481 *nextword
= l
& 0xffff;
485 DEBUG_PRINTF("is out of range\n");
486 fprintf(stderr
, "trouble with operand '%s': %s\n", operand_orig
, strerror(errno
));
489 /* not a number? try a label */
490 if (label_list_find_addr(ll
, operand
, nextword
)) {
491 DEBUG_PRINTF("(deferred label resolution)\n");
495 DEBUG_PRINTF("is a dereferenced label\n");
500 /* left with a literal or a label, then */
503 l
= strtoul(operand
, &ep
, 0);
505 || (*operand
&& (*ep
== '\0')) ) {
507 DEBUG_PRINTF("is out of range\n");
508 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
512 DEBUG_PRINTF("is literal value (%lu)\n", l
);
517 *nextword
= l
& 0xffff;
522 /* try to populate nextword with label address */
523 if (label_list_find_addr(ll
, operand
, nextword
)) {
524 DEBUG_PRINTF("(deferred label resolution)\n");
525 /* assume non-small literal value */
530 DEBUG_PRINTF("is label '%s' (0x%02hx)\n", operand
, *nextword
);
531 if (*nextword
< 0x20 && allow_short_labels
) {
532 DEBUG_PRINTF("small value label win\n");
533 return (0x20 + *nextword
) & 0x3f;
541 int instruction_print_(struct instruction_
*i
, unsigned int with_label
) {
546 r
= printf("%-16s %3s", i
->label
? i
->label
: "", i
->opcode
);
548 r
= printf("%3s", i
->opcode
);
550 for (o
= i
->operands
; o
; o
= o
->next
)
551 r
+= printf(" %s%s", o
->operand
, o
->next
? "," : "");
556 /* parse an instruction out of buf, create new instruction struct if seemingly valid */
557 /* does not actually check if instruction is valid yet */
558 /* buf must be 0-terminated */
560 int buf_tokenize_(char *buf
, struct instruction_
**next_instr
) {
561 const char const *sep
= " \t\n";
562 struct instruction_
*instr
= NULL
;
572 assert(next_instr
!= NULL
);
577 if ((x
= strchr(buf
, ';')) != NULL
)
579 /* kill leading whitespace */
580 buf
+= strspn(buf
, " \t\n");
581 /* kill trailing whitespace */
583 x
= buf
+ strlen(buf
);
584 while (strchr(" \t\n", *x
)) {
590 if ((x
= strrchr(buf
, '\n')) != NULL
)
593 /* determine if first token is label, opcode, or we just have a blank line to ignore */
594 x
= strtok_r(buf
, sep
, &st
);
596 /* empty line? nothing to do here. */
601 /* labels end with :, otherwise its an opcode */
602 y
= x
+ strlen(x
) - 1;
606 opcode
= strtok_r(NULL
, sep
, &st
);
608 #else /* OTHER_LABELS */
609 /* labels.. begin? with ':' ? okay, I guess. Whatever. */
610 /* otherwise, it's an opcode */
613 opcode
= strtok_r(NULL
, sep
, &st
);
618 #endif /* OTHER_LABELS */
624 /* extra room for assembled words */
625 instr
= calloc(1, 3 + sizeof *instr
);
627 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
631 instr
->label
= label
? strdup(label
) : NULL
;
632 instr
->opcode
= opcode
? strdup(opcode
) : NULL
;
635 struct operand_
**o_next
= &instr
->operands
;
637 for (x
= strtok_r(operand
, ",", &st
);
639 x
= strtok_r(NULL
, ",", &st
) ) {
640 *o_next
= malloc(3 + sizeof **o_next
); /* FIXME: handle this on the fly later */
642 if (*o_next
== NULL
) {
643 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
649 x
+= strspn(x
, " \t\n");
651 y
= x
+ strlen(x
) - 1;
652 while (strchr(" \t\n", *y
)) {
658 (*o_next
)->operand
= strdup(x
);
659 (*o_next
)->next
= NULL
;
660 o_next
= &((*o_next
)->next
);
669 /* try to generate bytecode for an instruction */
671 int instr_assemble_(struct label_list_
*ll
, struct instruction_
*i
, unsigned int allow_short_labels
) {
672 unsigned int nwu
= 0; /* number of words used */
673 unsigned int incomplete
= 0;
675 struct operand_
*o
= i
->operands
;
678 printf("%s: assembling ", __func__
);
679 instruction_print_(i
,1);
684 /* already assembled, nothing to do */
688 /* special case DAT */
689 if (strncasecmp(i
->opcode
, "DAT", 3) == 0) {
690 /* just dump operands into words, I guess */
691 fprintf(stderr
, "FIXME unhandled raw data\n");
692 /* count total length of data.. */
693 /* realloc instruction */
698 /* start with opcode bits */
699 bits
= opcode_bits_(i
->opcode
);
701 fprintf(stderr
, "unrecognized instruction '%s%s", i
->opcode
, i
->operands
? " " : "");
702 for (o
= i
->operands
; o
; o
= o
->next
)
703 fprintf(stderr
, " %s%s", o
->operand
, o
->next
? "," : "");
704 fprintf(stderr
, "'\n");
707 i
->instr_words
[0] |= 0x0f & bits
;
709 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
710 if ((bits
& 0x0f) == 0) {
711 bits
= nbi_opcode_bits_(i
->opcode
);
713 fprintf(stderr
, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
718 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
721 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ 1, &nwu
, allow_short_labels
);
723 fprintf(stderr
, "couldn't assemble instruction\n");
725 } else if (bits
== -2) {
726 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__
);
727 /* keep going, but don't finalize until we can calculate label address */
733 i
->instr_words
[0] |= (bits
& 0x3f) << 4;
736 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
740 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ nwu
+ 1, &nwu
, allow_short_labels
);
742 fprintf(stderr
, "couldn't assemble instruction\n");
744 } else if (bits
== -2) {
745 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__
);
746 /* keep going, but don't finalize until we can calculate label address */
751 i
->instr_words
[0] |= (bits
& 0x3f) << 10;
754 fprintf(stderr
, "too many operands\n");
758 /* counting labels as words, we now know at least the maximum instruction length */
762 DEBUG_PRINTF("instruction words: [%u]", i
->length
);
763 for (bits
= 0; bits
<= (int)nwu
; bits
++)
764 DEBUG_PRINTF(" %04x", i
->instr_words
[bits
]);
767 DEBUG_PRINTF(" (preliminary)");
778 * read lines from stream f
779 * break each line into parts, populate parts into structures
782 int parse_stream_(FILE *f
, const char *src
, struct instruction_list_
**il
, struct label_list_
**ll
, unsigned int allow_short_labels
) {
783 struct instruction_
*instr
, **instr_list_entry
;
784 unsigned int line
= 0;
788 buf
[sizeof buf
- 1] = '\0';
790 while (fgets(buf
, sizeof buf
, f
)) {
793 if (buf
[sizeof buf
- 1] != '\0') {
794 fprintf(stderr
, "%s:%u:%s", src
, line
, "input line too long\n");
799 if (buf_tokenize_(buf
, &instr
)) {
800 fprintf(stderr
, "%s:%u:%s", src
, line
, "trouble tokenizing input\n");
806 /* add to list of instructions */
807 if (instr_list_insert(il
, instr
)) {
808 fprintf(stderr
, "%s:%u:%s", src
, line
, "could not populate instruction list\n");
810 instr_list_entry
= (*il
)->instr
+ (*il
)->entries
- 1;
813 if (label_list_find_instr(*ll
, instr
->label
)) {
814 fprintf(stderr
, "%s:%u:%s", src
, line
, "duplicate label\n");
817 if (label_list_insert(ll
, instr_list_entry
)) {
818 fprintf(stderr
, "%s:%u:%s", src
, line
, "could not populate label list\n");
820 label_addr_calculate_(*il
, *ll
);
823 instr_assemble_(*ll
, instr
, allow_short_labels
);
827 fprintf(stderr
, "%s():%s\n", "fgets", strerror(errno
));
831 fprintf(stderr
, "parsing aborted\n");
839 * make a full pass over instruction list to resolve labels
842 int assemble_check_(struct instruction_list_
*il
, struct label_list_
*ll
, unsigned int allow_short_labels
) {
846 DEBUG_PRINTF(" final pass of assembler...\n");
847 for (x
= 0; x
< il
->entries
; x
++) {
848 retval
|= instr_assemble_(ll
, il
->instr
[x
], allow_short_labels
);
850 fprintf(stderr
, "instruction failed to assemble\n");
854 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
855 for (x
= 0; x
< ll
->entries
; x
++) {
856 if (! ll
->label
[x
].ready
)
859 printf("%3s0x%04x %-32s ",
860 ll
->label
[x
].ready
? "" : "*",
863 instruction_print_(*(ll
->label
[x
].instr
), 0);
868 VERBOSE_PRINTF("\n");
871 fprintf(stderr
, "some labels could not be resolved\n");
877 int output_(struct instruction_list_
*il
, const char *filename
) {
879 struct instruction_
*instr
;
880 size_t i
, r
, total_words
= 0;
884 of
= fopen(filename
, "w");
886 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
891 for (i
= 0; i
< il
->entries
; i
++) {
892 instr
= il
->instr
[i
];
896 s
= instruction_print_(instr
, 1);
897 printf("%*s;", (44 - s
) > 0 ? (44 - s
) : 0, "");
898 for (x
= 0; x
< instr
->length
; x
++) {
899 printf(" %04x", instr
->instr_words
[x
]);
905 r
= fwrite(instr
->instr_words
, sizeof(DCPU16_WORD
), instr
->length
, of
);
906 if (r
< instr
->length
) {
907 fprintf(stderr
, "%s():%s\n", "fwrite", strerror(errno
));
911 total_words
+= instr
->length
;
914 fprintf(stderr
, "%s 0x%04zx instructions as 0x%04zx words\n",
915 dryrun_
? "assembled" : "wrote",
922 static struct instruction_list_
*il_
;
923 static struct label_list_
*ll_
;
925 int main(int argc
, char *argv
[]) {
926 const char *out_filename
= NULL
;
927 unsigned int allow_short_labels
= 0;
930 while ( (c
= getopt(argc
, argv
, "dhsvo:")) != EOF
) {
937 allow_short_labels
++;
942 fprintf(stderr
, "Sorry, I can only write one file at a time.\n");
945 out_filename
= optarg
;
965 if (out_filename
== NULL
)
966 out_filename
= out_filename_default_
;
969 il_
= instr_list_new();
970 ll_
= label_list_new();
972 /* if filenames were specified, parse them instead of stdin */
975 char *filename
= *argv
;
976 FILE *f
= fopen(filename
, "r");
981 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
985 VERBOSE_PRINTF("assembling '%s'...\n", filename
);
986 parse_stream_(f
, filename
, &il_
, &ll_
, allow_short_labels
);
991 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
992 parse_stream_(stdin
, "-", &il_
, &ll_
, allow_short_labels
);
995 if (assemble_check_(il_
, ll_
, allow_short_labels
)) {
996 fprintf(stderr
, "errors prevented assembly\n");
1000 if (output_(il_
, out_filename
)) {
1001 fprintf(stderr
, "failed to create output\n");