12 * quick and dirty assembler for dcpu16
16 static const char * const src_id_
= "$Id$";
18 const char const out_filename_default_
[] = "a.out";
19 unsigned int verbose_
= 0;
20 unsigned int dryrun_
= 0;
23 #define DEBUG_NOTIFY(...) do { if (verbose_ > 2) fprintf(stderr, __VA_ARGS__); } while (0)
24 #define VERBOSE_NOTIFY(...) do { if (verbose_) printf(__VA_ARGS__); } while (0)
28 void usage_(char *prog
, unsigned int full
) {
29 FILE *f
= full
? stdout
: stderr
;
30 char *x
= strrchr(prog
, '/');
36 fprintf(f
, "%s -- \n\n",
39 fprintf(f
, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
43 fprintf(f
, "\nOptions:\n"
44 "\t-h -- this screen\n"
45 "\t-o <file> -- output to <file> [default: %s]\n"
46 "\t-s -- allow short labels in instruction words\n"
47 "\t-d -- dry run, print results, do not write to file\n"
48 "\t-v -- verbose output\n",
49 out_filename_default_
);
51 fprintf(f
, "\n%78s\n",
57 /* maintain an array of the instructions we have parsed */
59 struct instruction_list_
*instr_list_new(void) {
60 size_t init_size
= 1024;
61 struct instruction_list_
*il
= malloc(IL_SIZE(init_size
));
63 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
66 il
->allocated
= init_size
;
72 int instr_list_insert(struct instruction_list_
**il
, struct instruction_
*i
) {
73 /* make room make room */
74 if ((*il
)->entries
- 1 == (*il
)->allocated
) {
75 size_t new_allocated
= (*il
)->allocated
+ 1024;
76 void *tmp_ptr
= realloc(*il
, IL_SIZE(new_allocated
));
77 if (tmp_ptr
== NULL
) {
78 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
82 (*il
)->allocated
= new_allocated
;
85 (*il
)->instr
[(*il
)->entries
] = i
;
90 /* also maintain a list of the labels we've seen, indexed back to their instructions. */
91 /* FIXME: ugh, this could all stand to be rewritten cleaner */
92 /* these lists could be rearranged to be a lot easier to wrangle and/or maybe use common interfaces */
93 /* they were thrown together on the fly */
95 struct label_list_
*label_list_new(void) {
96 size_t init_size
= 256;
97 struct label_list_
*ll
= malloc(LL_SIZE(init_size
));
99 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
102 ll
->allocated
= init_size
;
107 /* instr here is index into instruction list */
109 int label_list_insert(struct label_list_
**ll
, struct instruction_
**instr
) {
110 if ((*ll
)->entries
- 1 == (*ll
)->allocated
) {
111 size_t new_allocated
= (*ll
)->allocated
+ 256;
112 void *tmp_ptr
= realloc(*ll
, IL_SIZE(new_allocated
));
113 if (tmp_ptr
== NULL
) {
114 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
118 (*ll
)->allocated
= new_allocated
;
121 DEBUG_NOTIFY("TRACE: adding label '%s'\n", (*instr
)->label
);
123 (*ll
)->label
[(*ll
)->entries
].label
= (*instr
)->label
;
124 (*ll
)->label
[(*ll
)->entries
].instr
= instr
;
129 /* locate the index of a labelled instruction within the instruction list */
131 struct instruction_
**label_list_find_instr(struct label_list_
*ll
, char *label
) {
134 for (x
= 0; x
< ll
->entries
; x
++) {
135 if (strcmp(ll
->label
[x
].label
, label
) == 0)
136 return ll
->label
[x
].instr
;
141 /* look up the address of a calculated address */
143 int label_list_find_addr(struct label_list_
*ll
, char *label
, DCPU16_WORD
*addr
) {
146 for (x
= 0; x
< ll
->entries
; x
++) {
147 if (strcmp(ll
->label
[x
].label
, label
) == 0) {
148 if (ll
->label
[x
].ready
== 1) {
149 *addr
= ll
->label
[x
].addr
;
157 /* attempt to determine the addresses of labels */
159 void label_addr_calculate_(struct instruction_list_
*il
, struct label_list_
*ll
) {
162 /* walk through labels */
163 for (i
= 0; i
< ll
->entries
; i
++) {
164 struct instruction_
**instr
;
165 unsigned int word_count
= 0;
167 if (ll
->label
[i
].ready
)
171 * walk backwards through the list of instructions
172 * until we get to the start or a known prior label address
175 for (instr
= ll
->label
[i
].instr
; instr
>= il
->instr
; instr
--) {
177 word_count
+= (*instr
)->length
;
180 && strcmp((*instr
)->label
, ll
->label
[i
].label
)) {
183 if (label_list_find_addr(ll
, (*instr
)->label
, &addr
)) {
184 fprintf(stderr
, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
193 ll
->label
[i
].addr
= word_count
;
194 ll
->label
[i
].ready
= 1;
195 DEBUG_NOTIFY("label '%s' has addr of 0x%04x\n", ll
->label
[i
].label
, word_count
);
200 void instr_free_(struct instruction_
*i
) {
205 while (i
->operands
) {
206 struct operand_
*o
= i
->operands
;
208 i
->operands
= o
->next
;
215 /* generate the nibble for a given basic opcode */
217 int opcode_bits_(char *opcode
) {
221 } opcodes_lower_nibble
[] = {
223 /* { "future nbi instruction", 0x00 }, */
242 for (o
= opcodes_lower_nibble
; o
->op
[0]; o
++) {
243 if (strcasecmp(o
->op
, opcode
) == 0)
247 if (o
->op
[0] == '\0') {
248 fprintf(stderr
, "unknown instruction '%s'\n", opcode
);
255 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
257 int nbi_opcode_bits_(char *nbi_opcode
) {
261 } nbi_opcodes_bits
[] = {
262 { " ", 0x00 }, /* reserved for future */
267 for (o
= nbi_opcodes_bits
; o
->op
[0]; o
++) {
268 if (strcasecmp(o
->op
, nbi_opcode
) == 0)
272 if (o
->op
[0] == '\0') {
273 fprintf(stderr
, "unknown nbi instruction '%s'\n", o
->op
);
280 /* convert register character like 'x' to value like 0x03 */
282 unsigned int register_enumerate_(char r
) {
283 const char regs
[] = "AaBbCcXxYyZzIiJj";
284 const char *x
= strchr(regs
, r
);
289 fprintf(stderr
, "internal error, unknown register character 0x%02x\n", r
);
293 /* generate the six bits for a given operand */
294 /* FIXME: MAEK BETTR */
295 /* notes: nextword may be rewritten even if it's not used in final instruction */
297 int value_bits_(struct label_list_
*ll
, char *operand_orig
, DCPU16_WORD
*nextword
, unsigned int *nextwordused
, unsigned int allow_short_labels
) {
298 unsigned int retval
= -1;
300 char *operand
, *o
, *ep
;
302 operand
= o
= strdup(operand_orig
);
304 DEBUG_NOTIFY("TRACE: operand '%s' is ", operand
);
306 if (strlen(operand
) == 1) {
307 if ( (strchr("ABCXYZIJ", *operand
))
308 || (strchr("abcxyzij", *operand
)) ) {
309 DEBUG_NOTIFY("register\n");
310 retval
= register_enumerate_(*operand
);
315 if (operand
[0] == '[' && operand
[strlen(operand
) - 1] == ']') {
316 operand
[strlen(operand
) - 1] = '\0';
318 /* trim whitespaces */
319 while (strchr(" \t\n", *operand
)) operand
++;
320 ep
= operand
+ strlen(operand
) - 1;
322 if (strlen(operand
) == 1) {
323 DEBUG_NOTIFY("dereferenced register\n");
324 retval
= 0x08 | register_enumerate_(*operand
);
328 if ( (ep
= strchr(operand
, '+')) ) {
332 while (strchr("+ \t\n", *ep
)) {
336 if (strlen(ep
) == 1) {
339 } else if (strlen(operand
) == 1) {
343 fprintf(stderr
, "couldn't parse operand\n");
347 if ( strchr("ABCXYZIJ", reg
)
348 || strchr("abcxyzij", reg
) ) {
349 l
= strtoul(constant
, &ep
, 0);
352 DEBUG_NOTIFY("dereferenced register+constant\n");
354 DEBUG_NOTIFY("\tregister_index:%u %c\n", reg
, register_enumerate_(reg
));
355 DEBUG_NOTIFY("\tconstant:%lu\n", l
);
357 *nextword
= l
& 0xffff;
359 retval
= 0x10 | register_enumerate_(reg
);
363 l
= strtoul(operand
, &ep
, 0);
364 DEBUG_NOTIFY("dereferenced literal value %lu...\n", l
);
365 *nextword
= l
& 0xffff;
371 if (strcasecmp(operand
, "POP") == 0) {
372 DEBUG_NOTIFY("POP\n");
376 if (strcasecmp(operand
, "PUSH") == 0) {
377 DEBUG_NOTIFY("PUSH\n");
381 if (strcasecmp(operand
, "PEEK") == 0) {
382 DEBUG_NOTIFY("PEEK\n");
386 if (strcasecmp(operand
, "SP") == 0) {
387 DEBUG_NOTIFY("sp register\n");
391 if (strcasecmp(operand
, "PC") == 0) {
392 DEBUG_NOTIFY("pc register\n");
396 if (strcasecmp(operand
, "O") == 0) {
397 DEBUG_NOTIFY("o register\n");
402 l
= strtoul(operand
, &ep
, 0);
404 if (operand
&& *ep
== '\0') {
405 DEBUG_NOTIFY("literal value %lu...\n", l
);
410 *nextword
= l
& 0xffff;
417 /* try to populate nextword with label address */
418 if (label_list_find_addr(ll
, operand
, nextword
)) {
419 DEBUG_NOTIFY("currently-unknown label...\n");
420 /* assume non-small literal value */
425 DEBUG_NOTIFY("label '%s' 0x%02hx\n", operand
, *nextword
);
426 if (*nextword
< 0x20 && allow_short_labels
) {
427 DEBUG_NOTIFY("small value label win\n");
428 retval
= (0x20 + *nextword
) & 0x3f;
441 int instruction_print_(struct instruction_
*i
, unsigned int with_label
) {
446 r
= printf("%-16s %3s", i
->label
? i
->label
: "", i
->opcode
);
448 r
= printf("%3s", i
->opcode
);
450 for (o
= i
->operands
; o
; o
= o
->next
)
451 r
+= printf(" %s%s", o
->operand
, o
->next
? "," : "");
456 /* parse an instruction out of buf, create new instruction struct if seemingly valid */
457 /* does not actually check if instruction is valid yet */
458 /* buf must be 0-terminated */
460 int buf_tokenize_(char *buf
, struct instruction_
**next_instr
) {
461 const char const *sep
= " \t\n";
462 struct instruction_
*instr
= NULL
;
472 assert(next_instr
!= NULL
);
477 if ((x
= strchr(buf
, ';')) != NULL
)
479 /* kill leading whitespace */
480 buf
+= strspn(buf
, " \t\n");
481 /* kill trailing whitespace */
483 x
= buf
+ strlen(buf
);
484 while (strchr(" \t\n", *x
)) {
490 if ((x
= strrchr(buf
, '\n')) != NULL
)
493 /* determine if first token is label, opcode, or we just have a blank line to ignore */
494 x
= strtok_r(buf
, sep
, &st
);
496 /* empty line? nothing to do here. */
501 /* labels end with :, otherwise its an opcode */
502 y
= x
+ strlen(x
) - 1;
506 opcode
= strtok_r(NULL
, sep
, &st
);
508 #else /* OTHER_LABELS */
509 /* labels.. begin? with ':' ? okay, I guess. Whatever. */
510 /* otherwise, it's an opcode */
513 opcode
= strtok_r(NULL
, sep
, &st
);
518 #endif /* OTHER_LABELS */
524 /* extra room for assembled words */
525 instr
= calloc(1, 3 + sizeof *instr
);
527 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
531 instr
->label
= label
? strdup(label
) : NULL
;
532 instr
->opcode
= opcode
? strdup(opcode
) : NULL
;
535 struct operand_
**o_next
= &instr
->operands
;
537 for (x
= strtok_r(operand
, ",", &st
);
539 x
= strtok_r(NULL
, ",", &st
) ) {
540 *o_next
= malloc(3 + sizeof **o_next
); /* FIXME: handle this on the fly later */
542 if (*o_next
== NULL
) {
543 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
549 x
+= strspn(x
, " \t\n");
551 y
= x
+ strlen(x
) - 1;
552 while (strchr(" \t\n", *y
)) {
558 (*o_next
)->operand
= strdup(x
);
559 (*o_next
)->next
= NULL
;
560 o_next
= &((*o_next
)->next
);
569 /* try to generate bytecode for an instruction */
571 void instr_bytecodify_(struct label_list_
*ll
, struct instruction_
*i
, unsigned int allow_short_labels
) {
572 unsigned int nwu
= 0; /* number of words used */
573 unsigned int incomplete
= 0;
575 struct operand_
*o
= i
->operands
;
577 DEBUG_NOTIFY("TRACE: codifying %s%s'%s'...", i
->label
? i
->label
: "", i
->label
? ":" : "", i
->opcode
);
580 /* already codified */
584 /* special case DAT */
585 if (strncasecmp(i
->opcode
, "DAT", 3) == 0) {
586 /* just dump operands into words, I guess */
587 fprintf(stderr
, "FIXME unhandled raw data\n");
588 /* count total length of data.. */
589 /* realloc instruction */
594 /* start with opcode bits */
595 bits
= opcode_bits_(i
->opcode
);
597 fprintf(stderr
, "unrecognized instruction '%s'\n", i
->opcode
);
600 i
->instr_words
[0] |= 0x0f & bits
;
602 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
603 if ((bits
& 0x0f) == 0) {
604 bits
= nbi_opcode_bits_(i
->opcode
);
606 fprintf(stderr
, "internal error: missing instruction in nbi opcode table\n");
611 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
614 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ 1, &nwu
, allow_short_labels
);
616 DEBUG_NOTIFY("TRACE: unresolved label\n");
617 /* keep going, but don't finalize until we can calculate label address */
623 i
->instr_words
[0] |= (bits
& 0x3f) << 4;
626 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
630 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ nwu
+ 1, &nwu
, allow_short_labels
);
632 DEBUG_NOTIFY("TRACE: unresolved label\n");
633 /* keep going, but don't finalize until we can calculate label address */
638 i
->instr_words
[0] |= (bits
& 0x3f) << 10;
640 /* counting labels as words, we now know the maximum instruction length */
641 /* if label is < 0x20, it can take up less space */
645 DEBUG_NOTIFY("instruction words: [%u]", i
->length
);
646 for (bits
= 0; bits
<= (int)nwu
; bits
++)
647 DEBUG_NOTIFY(" 0x%04x", i
->instr_words
[bits
]);
650 DEBUG_NOTIFY(" (preliminary)");
658 /* thish should grow buffer to fit huge linesh, but I jusht don't care right now, hic */
660 int parse_stream_(FILE *f
, struct instruction_list_
**il
, struct label_list_
**ll
, unsigned int allow_short_labels
) {
661 struct instruction_
*instr
, **instr_list_entry
;
664 buf
[sizeof buf
- 1] = '\0';
666 while (fgets(buf
, sizeof buf
, f
)) {
667 if (buf
[sizeof buf
- 1] != '\0') {
668 fprintf(stderr
, "input buffer exhausted\n");
672 if (buf_tokenize_(buf
, &instr
)) {
673 fprintf(stderr
, "trouble tokenizing input\n");
678 /* add to list of instructions */
679 if (instr_list_insert(il
, instr
)) {
680 fprintf(stderr
, "could not populate instruction list\n");
682 instr_list_entry
= (*il
)->instr
+ (*il
)->entries
- 1;
683 DEBUG_NOTIFY("TRACE: verify %s == %s\n", (*instr_list_entry
)->opcode
, instr
->opcode
);
685 if (label_list_find_instr(*ll
, instr
->label
)) {
686 fprintf(stderr
, "duplicate label\n");
689 if (label_list_insert(ll
, instr_list_entry
)) {
690 fprintf(stderr
, "could not populate label list\n");
692 label_addr_calculate_(*il
, *ll
);
695 instr_bytecodify_(*ll
, instr
, allow_short_labels
);
699 fprintf(stderr
, "%s():%s\n", "fgets", strerror(errno
));
703 fprintf(stderr
, "parsing aborted\n");
711 int assemble_check_(struct instruction_list_
*il
, struct label_list_
*ll
, unsigned int allow_short_labels
) {
715 DEBUG_NOTIFY(" final pass of codifier...\n");
716 for (x
= 0; x
< il
->entries
; x
++) {
717 instr_bytecodify_(ll
, il
->instr
[x
], allow_short_labels
);
720 VERBOSE_NOTIFY("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
721 for (x
= 0; x
< ll
->entries
; x
++) {
722 if (! ll
->label
[x
].ready
)
724 VERBOSE_NOTIFY("%3s0x%04x %-32s ",
725 ll
->label
[x
].ready
? "" : "*",
729 instruction_print_(*(ll
->label
[x
].instr
), 0);
734 VERBOSE_NOTIFY("\n");
737 fprintf(stderr
, "some labels could not be resolved\n");
743 int output_(struct instruction_list_
*il
, const char *filename
) {
745 struct instruction_
*instr
;
746 size_t i
, r
, total_words
= 0;
750 of
= fopen(filename
, "w");
752 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
757 for (i
= 0; i
< il
->entries
; i
++) {
758 instr
= il
->instr
[i
];
762 s
= instruction_print_(instr
, 1);
763 printf("%*s;", (44 - s
) > 0 ? (44 - s
) : 0, "");
764 for (x
= 0; x
< instr
->length
; x
++) {
765 printf(" %04x", instr
->instr_words
[x
]);
771 r
= fwrite(instr
->instr_words
, sizeof(DCPU16_WORD
), instr
->length
, of
);
772 if (r
< instr
->length
) {
773 fprintf(stderr
, "%s():%s\n", "fwrite", strerror(errno
));
777 total_words
+= instr
->length
;
780 fprintf(stderr
, "wrote 0x%04zx instructions as 0x%04zx words\n",
786 static struct instruction_list_
*il_
;
787 static struct label_list_
*ll_
;
789 int main(int argc
, char *argv
[]) {
790 const char *out_filename
= NULL
;
791 unsigned int allow_short_labels
= 0;
794 while ( (c
= getopt(argc
, argv
, "dhsvo:")) != EOF
) {
801 allow_short_labels
++;
806 fprintf(stderr
, "Sorry, I can only write one file at a time.\n");
809 out_filename
= optarg
;
829 if (out_filename
== NULL
)
830 out_filename
= out_filename_default_
;
833 il_
= instr_list_new();
834 ll_
= label_list_new();
836 /* if filenames were specified, parse them instead of stdin */
839 char *filename
= *argv
;
840 FILE *f
= fopen(filename
, "r");
845 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
849 VERBOSE_NOTIFY("assembling '%s'...\n", filename
);
850 parse_stream_(f
, &il_
, &ll_
, allow_short_labels
);
855 VERBOSE_NOTIFY("assembling '%s'...\n", "stdin");
856 parse_stream_(stdin
, &il_
, &ll_
, allow_short_labels
);
859 if (assemble_check_(il_
, ll_
, allow_short_labels
)) {
860 fprintf(stderr
, "errors prevented assembly\n");
864 if (output_(il_
, out_filename
)) {
865 fprintf(stderr
, "failed to create output\n");