c89d1030816bdec4f9695fefddae34d01d87b11c
12 * quick and dirty assembler for dcpu16
15 * needs ability to specify location for code or data
18 static const char * const src_id_
= "$Id$";
20 const char const out_filename_default_
[] = "a.out";
21 unsigned int verbose_
= 0;
22 unsigned int dryrun_
= 0;
25 #define DEBUG_PRINTF(...) do { if (verbose_ > 2) printf(__VA_ARGS__); } while (0)
26 #define VERBOSE_PRINTF(...) do { if (verbose_) printf(__VA_ARGS__); } while (0)
30 void usage_(char *prog
, unsigned int full
) {
31 FILE *f
= full
? stdout
: stderr
;
32 char *x
= strrchr(prog
, '/');
38 fprintf(f
, "%s -- \n\n",
41 fprintf(f
, "Usage: %s [-h] [-v] [-s] [-o file] file [file [...]]\n",
45 fprintf(f
, "\nOptions:\n"
46 "\t-h -- this screen\n"
47 "\t-o <file> -- output to <file> [default: %s]\n"
48 "\t-s -- allow short labels in instruction words\n"
49 "\t-d -- dry run, print results, do not write to file\n"
50 "\t-v -- verbose output\n",
51 out_filename_default_
);
53 fprintf(f
, "\n%78s\n",
59 /* maintain an array of the instructions we have parsed */
61 struct instruction_list_
*instr_list_new(void) {
62 size_t init_size
= 1024;
63 struct instruction_list_
*il
= malloc(IL_SIZE(init_size
));
65 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
68 il
->allocated
= init_size
;
74 int instr_list_insert(struct instruction_list_
**il
, struct instruction_
*i
) {
75 /* make room make room */
76 if ((*il
)->entries
- 1 == (*il
)->allocated
) {
77 size_t new_allocated
= (*il
)->allocated
+ 1024;
78 void *tmp_ptr
= realloc(*il
, IL_SIZE(new_allocated
));
79 if (tmp_ptr
== NULL
) {
80 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
84 (*il
)->allocated
= new_allocated
;
87 (*il
)->instr
[(*il
)->entries
] = i
;
92 /* also maintain a list of the labels we've seen, indexed back to their instructions. */
93 /* FIXME: ugh, this could all stand to be rewritten cleaner */
94 /* these lists could be rearranged to be a lot easier to wrangle and/or maybe use common interfaces */
95 /* they were thrown together on the fly */
97 struct label_list_
*label_list_new(void) {
98 size_t init_size
= 256;
99 struct label_list_
*ll
= malloc(LL_SIZE(init_size
));
101 fprintf(stderr
, "%s():%s\n", "malloc", strerror(errno
));
104 ll
->allocated
= init_size
;
109 /* instr here is index into instruction list */
111 int label_list_insert(struct label_list_
**ll
, struct instruction_
**instr
) {
112 if ((*ll
)->entries
- 1 == (*ll
)->allocated
) {
113 size_t new_allocated
= (*ll
)->allocated
+ 256;
114 void *tmp_ptr
= realloc(*ll
, IL_SIZE(new_allocated
));
115 if (tmp_ptr
== NULL
) {
116 fprintf(stderr
, "%s():%s\n", "realloc", strerror(errno
));
120 (*ll
)->allocated
= new_allocated
;
123 DEBUG_PRINTF("TRACE: adding label '%s'\n", (*instr
)->label
);
125 (*ll
)->label
[(*ll
)->entries
].label
= (*instr
)->label
;
126 (*ll
)->label
[(*ll
)->entries
].instr
= instr
;
131 /* locate the index of a labelled instruction within the instruction list */
133 struct instruction_
**label_list_find_instr(struct label_list_
*ll
, char *label
) {
136 for (x
= 0; x
< ll
->entries
; x
++) {
137 if (strcmp(ll
->label
[x
].label
, label
) == 0)
138 return ll
->label
[x
].instr
;
143 /* look up the address of a calculated address */
145 int label_list_find_addr(struct label_list_
*ll
, char *label
, DCPU16_WORD
*addr
) {
148 for (x
= 0; x
< ll
->entries
; x
++) {
149 if (strcmp(ll
->label
[x
].label
, label
) == 0) {
150 if (ll
->label
[x
].ready
== 1) {
151 *addr
= ll
->label
[x
].addr
;
159 /* attempt to determine the addresses of labels */
161 void label_addr_calculate_(struct instruction_list_
*il
, struct label_list_
*ll
) {
164 /* walk through labels */
165 for (i
= 0; i
< ll
->entries
; i
++) {
166 struct instruction_
**instr
;
167 unsigned int word_count
= 0;
169 if (ll
->label
[i
].ready
)
173 * walk backwards through the list of instructions
174 * until we get to the start or a known prior label address
177 for (instr
= ll
->label
[i
].instr
; instr
>= il
->instr
; instr
--) {
179 word_count
+= (*instr
)->length
;
182 && strcmp((*instr
)->label
, ll
->label
[i
].label
)) {
185 if (label_list_find_addr(ll
, (*instr
)->label
, &addr
)) {
186 fprintf(stderr
, "internal error: incomplete prior address for '%s' while calculating '%s'\n",
195 ll
->label
[i
].addr
= word_count
;
196 ll
->label
[i
].ready
= 1;
197 DEBUG_PRINTF("label '%s' has addr of 0x%04x\n", ll
->label
[i
].label
, word_count
);
202 void instr_free_(struct instruction_
*i
) {
207 while (i
->operands
) {
208 struct operand_
*o
= i
->operands
;
210 i
->operands
= o
->next
;
217 /* generate the nibble for a given basic opcode */
219 int opcode_bits_(char *opcode
) {
223 } opcodes_lower_nibble
[] = {
225 /* { "future nbi instruction", 0x00 }, */
244 for (o
= opcodes_lower_nibble
; o
->op
[0]; o
++) {
245 if (strcasecmp(o
->op
, opcode
) == 0)
249 if (o
->op
[0] == '\0') {
250 fprintf(stderr
, "unknown instruction '%s'\n", opcode
);
257 /* generate the six bits for a given nbi opcode (aka first operand to opcode 0x00) */
259 int nbi_opcode_bits_(char *nbi_opcode
) {
263 } nbi_opcodes_bits
[] = {
264 { " ", 0x00 }, /* reserved for future */
269 for (o
= nbi_opcodes_bits
; o
->op
[0]; o
++) {
270 if (strcasecmp(o
->op
, nbi_opcode
) == 0)
274 if (o
->op
[0] == '\0') {
275 fprintf(stderr
, "unknown nbi instruction '%s'\n", o
->op
);
282 /* convert register character like 'x' to value like 0x03 */
284 unsigned int register_enumerate_(char r
) {
285 const char regs
[] = "AaBbCcXxYyZzIiJj";
286 const char *x
= strchr(regs
, r
);
291 fprintf(stderr
, "internal error, unknown register character 0x%02x\n", r
);
295 /* removes all occurences of chars from buf */
297 void buf_strip_chars_(char *buf
, char *chars
) {
300 for (s
= d
= buf
; *s
; s
++, d
++) {
301 while (*s
&& strchr(chars
, *s
)) {
312 * generate the six bits for a given operand string
313 * returns -1 if it could not parse the operand
314 * returns -2 if it could not parse the operand due to an unresolved label
315 * notes: nextword may be overwritten even if it's not used in final instruction
318 int value_bits_(struct label_list_
*ll
, char *operand_orig
, DCPU16_WORD
*nextword
, unsigned int *nextwordused
, unsigned int allow_short_labels
) {
319 static char *operand
= NULL
;
320 static size_t operand_sz
= 0;
326 Our operand working buffer shouldn't ever need to be too big,
327 but DAT might blow that assumption.
329 if (operand_sz
<= strlen(operand_orig
)) {
331 size_t new_sz
= strlen(operand_orig
);
337 DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__
, new_sz
);
338 tmp_ptr
= realloc(operand
, new_sz
);
339 if (tmp_ptr
== NULL
) {
340 fprintf(stderr
, "%s(%zu):%s\n", "realloc", new_sz
, strerror(errno
));
347 o
= strcpy(operand
, operand_orig
);
349 DEBUG_PRINTF("%s: operand '%s' ", __func__
, operand
); /* completed later */
351 /* this is a very stupid parser */
353 /* first, let's trim all whitespace out of string at once to make parsing easier */
354 buf_strip_chars_(operand
, " \t\n");
356 /* single character might match a register */
357 if (strlen(operand
) == 1
358 && strchr("AaBbCcXxYyZzIiJj", *operand
)) {
359 DEBUG_PRINTF("is register %c\n", *operand
);
360 return register_enumerate_(*operand
);
364 if (strcasecmp(operand
, "POP") == 0) {
365 DEBUG_PRINTF("is POP\n");
368 if (strcasecmp(operand
, "PUSH") == 0) {
369 DEBUG_PRINTF("is PUSH\n");
372 if (strcasecmp(operand
, "PEEK") == 0) {
373 DEBUG_PRINTF("is PEEK\n");
376 if (strcasecmp(operand
, "SP") == 0) {
377 DEBUG_PRINTF("is register SP\n");
380 if (strcasecmp(operand
, "PC") == 0) {
381 DEBUG_PRINTF("is register PC\n");
384 if (strcasecmp(operand
, "O") == 0) {
385 DEBUG_PRINTF("is register O\n");
389 /* is the operand [bracketed]? */
390 if (operand
[0] == '[' && operand
[strlen(operand
) - 1] == ']') {
391 /* eat the brackets */
392 operand
[strlen(operand
) - 1] = '\0';
395 /* is it [register]? */
396 if (strlen(operand
) == 1
397 && strchr("AaBbCcXxYyZzIiJj", *operand
)) {
398 DEBUG_PRINTF("is dereferenced register %c\n", *operand
);
399 return 0x08 | register_enumerate_(*operand
);
402 /* is it [register+something]? */
403 if ( (ep
= strchr(operand
, '+')) ) {
411 /* figure out which one is which */
413 && strchr("AaBbCcXxYyZzIiJj", *ep
)) {
416 } else if (strlen(operand
) == 1
417 && strchr("AaBbCcXxYyZzIiJj", *operand
) ) {
421 DEBUG_PRINTF("is unparsable\n");
422 fprintf(stderr
, "couldn't parse operand '%s'\n", operand_orig
);
426 /* check if something is understandable as a value */
428 l
= strtoul(constant
, &ep
, 0);
430 && (*constant
&& (*ep
== '\0')) ) {
431 /* string conversion went without issue */
432 /* validate it will fit in a word */
434 DEBUG_PRINTF("is out of range\n");
435 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
440 *nextword
= l
& 0xffff;
442 DEBUG_PRINTF("is a dereferenced register (%c) + constant (%hu)\n", *reg
, *nextword
);
443 return 0x10 | register_enumerate_(*reg
);
445 DEBUG_PRINTF("is out of range\n");
446 fprintf(stderr
, "trouble with operand '%s': %s\n", operand_orig
, strerror(errno
));
450 /* what? still here? assume it's a label, I guess */
451 /* try to populate nextword with label address */
452 if (label_list_find_addr(ll
, operand
, nextword
)) {
453 DEBUG_PRINTF("(deferred label resolution)\n");
457 DEBUG_PRINTF("is a dereferenced register (%c) + label\n", *reg
);
459 return 0x10 | register_enumerate_(*reg
);
462 /* it must just be a dereferenced literal then */
465 l
= strtoul(operand
, &ep
, 0);
467 && (*operand
&& (*ep
== '\0')) ) {
468 /* string conversion went without issue */
469 /* validate it will fit in a word */
471 DEBUG_PRINTF("is out of range\n");
472 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
476 DEBUG_PRINTF("is a dereferenced literal value (%hu)\n", *nextword
);
477 *nextword
= l
& 0xffff;
481 DEBUG_PRINTF("is out of range\n");
482 fprintf(stderr
, "trouble with operand '%s': %s\n", operand_orig
, strerror(errno
));
485 /* not a number? try a label */
486 if (label_list_find_addr(ll
, operand
, nextword
)) {
487 DEBUG_PRINTF("(deferred label resolution)\n");
491 DEBUG_PRINTF("is a dereferenced label\n");
496 /* left with a literal or a label, then */
499 l
= strtoul(operand
, &ep
, 0);
501 || (*operand
&& (*ep
== '\0')) ) {
503 DEBUG_PRINTF("is out of range\n");
504 fprintf(stderr
, "constant invalid in operand '%s'\n", operand_orig
);
508 DEBUG_PRINTF("is literal value (%lu)\n", l
);
513 *nextword
= l
& 0xffff;
518 /* try to populate nextword with label address */
519 if (label_list_find_addr(ll
, operand
, nextword
)) {
520 DEBUG_PRINTF("(deferred label resolution)\n");
521 /* assume non-small literal value */
526 DEBUG_PRINTF("is label '%s' (0x%02hx)\n", operand
, *nextword
);
527 if (*nextword
< 0x20 && allow_short_labels
) {
528 DEBUG_PRINTF("small value label win\n");
529 return (0x20 + *nextword
) & 0x3f;
537 int instruction_print_(struct instruction_
*i
, unsigned int with_label
) {
542 r
= printf("%-16s %3s", i
->label
? i
->label
: "", i
->opcode
);
544 r
= printf("%3s", i
->opcode
);
546 for (o
= i
->operands
; o
; o
= o
->next
)
547 r
+= printf(" %s%s", o
->operand
, o
->next
? "," : "");
552 /* parse an instruction out of buf, create new instruction struct if seemingly valid */
553 /* does not actually check if instruction is valid yet */
554 /* buf must be 0-terminated */
556 int buf_tokenize_(char *buf
, struct instruction_
**next_instr
) {
557 const char const *sep
= " \t\n";
558 struct instruction_
*instr
= NULL
;
568 assert(next_instr
!= NULL
);
573 if ((x
= strchr(buf
, ';')) != NULL
)
575 /* kill leading whitespace */
576 buf
+= strspn(buf
, " \t\n");
577 /* kill trailing whitespace */
579 x
= buf
+ strlen(buf
);
580 while (strchr(" \t\n", *x
)) {
586 if ((x
= strrchr(buf
, '\n')) != NULL
)
589 /* determine if first token is label, opcode, or we just have a blank line to ignore */
590 x
= strtok_r(buf
, sep
, &st
);
592 /* empty line? nothing to do here. */
597 /* labels end with :, otherwise its an opcode */
598 y
= x
+ strlen(x
) - 1;
602 opcode
= strtok_r(NULL
, sep
, &st
);
604 #else /* OTHER_LABELS */
605 /* labels.. begin? with ':' ? okay, I guess. Whatever. */
606 /* otherwise, it's an opcode */
609 opcode
= strtok_r(NULL
, sep
, &st
);
614 #endif /* OTHER_LABELS */
620 /* extra room for assembled words */
621 instr
= calloc(1, 3 + sizeof *instr
);
623 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
627 instr
->label
= label
? strdup(label
) : NULL
;
628 instr
->opcode
= opcode
? strdup(opcode
) : NULL
;
631 struct operand_
**o_next
= &instr
->operands
;
633 for (x
= strtok_r(operand
, ",", &st
);
635 x
= strtok_r(NULL
, ",", &st
) ) {
636 *o_next
= malloc(3 + sizeof **o_next
); /* FIXME: handle this on the fly later */
638 if (*o_next
== NULL
) {
639 fprintf(stderr
, "%s():%s\n", "calloc", strerror(errno
));
645 x
+= strspn(x
, " \t\n");
647 y
= x
+ strlen(x
) - 1;
648 while (strchr(" \t\n", *y
)) {
654 (*o_next
)->operand
= strdup(x
);
655 (*o_next
)->next
= NULL
;
656 o_next
= &((*o_next
)->next
);
665 /* try to generate bytecode for an instruction */
667 int instr_assemble_(struct label_list_
*ll
, struct instruction_
*i
, unsigned int allow_short_labels
) {
668 unsigned int nwu
= 0; /* number of words used */
669 unsigned int incomplete
= 0;
671 struct operand_
*o
= i
->operands
;
674 printf("%s: assembling ", __func__
);
675 instruction_print_(i
,1);
680 /* already assembled, nothing to do */
684 /* special case DAT */
685 if (strncasecmp(i
->opcode
, "DAT", 3) == 0) {
686 /* just dump operands into words, I guess */
687 fprintf(stderr
, "FIXME unhandled raw data\n");
688 /* count total length of data.. */
689 /* realloc instruction */
694 /* start with opcode bits */
695 bits
= opcode_bits_(i
->opcode
);
697 fprintf(stderr
, "unrecognized instruction '%s%s", i
->opcode
, i
->operands
? " " : "");
698 for (o
= i
->operands
; o
; o
= o
->next
)
699 fprintf(stderr
, " %s%s", o
->operand
, o
->next
? "," : "");
700 fprintf(stderr
, "'\n");
703 i
->instr_words
[0] |= 0x0f & bits
;
705 /* in rendered bytecode, all instructions have two operands; nbi instructions take 'first operand' bits. */
706 if ((bits
& 0x0f) == 0) {
707 bits
= nbi_opcode_bits_(i
->opcode
);
709 fprintf(stderr
, "INTERNAL ERROR: missing instruction in nbi opcode table\n");
714 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
717 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ 1, &nwu
, allow_short_labels
);
719 fprintf(stderr
, "couldn't assemble instruction\n");
721 } else if (bits
== -2) {
722 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__
);
723 /* keep going, but don't finalize until we can calculate label address */
729 i
->instr_words
[0] |= (bits
& 0x3f) << 4;
732 fprintf(stderr
, "'%s' requires more operands\n", i
->opcode
);
736 bits
= value_bits_(ll
, o
->operand
, i
->instr_words
+ nwu
+ 1, &nwu
, allow_short_labels
);
738 fprintf(stderr
, "couldn't assemble instruction\n");
740 } else if (bits
== -2) {
741 DEBUG_PRINTF("%s: assembly deferred: unresolved label\n", __func__
);
742 /* keep going, but don't finalize until we can calculate label address */
747 i
->instr_words
[0] |= (bits
& 0x3f) << 10;
750 fprintf(stderr
, "too many operands\n");
754 /* counting labels as words, we now know at least the maximum instruction length */
758 DEBUG_PRINTF("instruction words: [%u]", i
->length
);
759 for (bits
= 0; bits
<= (int)nwu
; bits
++)
760 DEBUG_PRINTF(" %04x", i
->instr_words
[bits
]);
763 DEBUG_PRINTF(" (preliminary)");
774 * read lines from stream f
775 * break each line into parts, populate parts into structures
778 int parse_stream_(FILE *f
, const char *src
, struct instruction_list_
**il
, struct label_list_
**ll
, unsigned int allow_short_labels
) {
779 struct instruction_
*instr
, **instr_list_entry
;
780 unsigned int line
= 0;
784 buf
[sizeof buf
- 1] = '\0';
786 while (fgets(buf
, sizeof buf
, f
)) {
789 if (buf
[sizeof buf
- 1] != '\0') {
790 fprintf(stderr
, "%s:%u:%s", src
, line
, "input line too long\n");
795 if (buf_tokenize_(buf
, &instr
)) {
796 fprintf(stderr
, "%s:%u:%s", src
, line
, "trouble tokenizing input\n");
802 /* add to list of instructions */
803 if (instr_list_insert(il
, instr
)) {
804 fprintf(stderr
, "%s:%u:%s", src
, line
, "could not populate instruction list\n");
806 instr_list_entry
= (*il
)->instr
+ (*il
)->entries
- 1;
809 if (label_list_find_instr(*ll
, instr
->label
)) {
810 fprintf(stderr
, "%s:%u:%s", src
, line
, "duplicate label\n");
813 if (label_list_insert(ll
, instr_list_entry
)) {
814 fprintf(stderr
, "%s:%u:%s", src
, line
, "could not populate label list\n");
816 label_addr_calculate_(*il
, *ll
);
819 instr_assemble_(*ll
, instr
, allow_short_labels
);
823 fprintf(stderr
, "%s():%s\n", "fgets", strerror(errno
));
827 fprintf(stderr
, "parsing aborted\n");
835 * make a full pass over instruction list to resolve labels
838 int assemble_check_(struct instruction_list_
*il
, struct label_list_
*ll
, unsigned int allow_short_labels
) {
842 DEBUG_PRINTF(" final pass of assembler...\n");
843 for (x
= 0; x
< il
->entries
; x
++) {
844 retval
|= instr_assemble_(ll
, il
->instr
[x
], allow_short_labels
);
846 fprintf(stderr
, "instruction failed to assemble\n");
850 VERBOSE_PRINTF("%3s %6s %-32s %-4s\n", "", "_addr_", "_label_", "_instruction_");
851 for (x
= 0; x
< ll
->entries
; x
++) {
852 if (! ll
->label
[x
].ready
)
855 printf("%3s0x%04x %-32s ",
856 ll
->label
[x
].ready
? "" : "*",
859 instruction_print_(*(ll
->label
[x
].instr
), 0);
864 VERBOSE_PRINTF("\n");
867 fprintf(stderr
, "some labels could not be resolved\n");
873 int output_(struct instruction_list_
*il
, const char *filename
) {
875 struct instruction_
*instr
;
876 size_t i
, r
, total_words
= 0;
880 of
= fopen(filename
, "w");
882 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
887 for (i
= 0; i
< il
->entries
; i
++) {
888 instr
= il
->instr
[i
];
892 s
= instruction_print_(instr
, 1);
893 printf("%*s;", (44 - s
) > 0 ? (44 - s
) : 0, "");
894 for (x
= 0; x
< instr
->length
; x
++) {
895 printf(" %04x", instr
->instr_words
[x
]);
901 r
= fwrite(instr
->instr_words
, sizeof(DCPU16_WORD
), instr
->length
, of
);
902 if (r
< instr
->length
) {
903 fprintf(stderr
, "%s():%s\n", "fwrite", strerror(errno
));
907 total_words
+= instr
->length
;
910 fprintf(stderr
, "%s 0x%04zx instructions as 0x%04zx words\n",
911 dryrun_
? "assembled" : "wrote",
918 static struct instruction_list_
*il_
;
919 static struct label_list_
*ll_
;
921 int main(int argc
, char *argv
[]) {
922 const char *out_filename
= NULL
;
923 unsigned int allow_short_labels
= 0;
926 while ( (c
= getopt(argc
, argv
, "dhsvo:")) != EOF
) {
933 allow_short_labels
++;
938 fprintf(stderr
, "Sorry, I can only write one file at a time.\n");
941 out_filename
= optarg
;
961 if (out_filename
== NULL
)
962 out_filename
= out_filename_default_
;
965 il_
= instr_list_new();
966 ll_
= label_list_new();
968 /* if filenames were specified, parse them instead of stdin */
971 char *filename
= *argv
;
972 FILE *f
= fopen(filename
, "r");
977 fprintf(stderr
, "%s('%s'):%s\n", "fopen", filename
, strerror(errno
));
981 VERBOSE_PRINTF("assembling '%s'...\n", filename
);
982 parse_stream_(f
, filename
, &il_
, &ll_
, allow_short_labels
);
987 VERBOSE_PRINTF("assembling '%s'...\n", "stdin");
988 parse_stream_(stdin
, "-", &il_
, &ll_
, allow_short_labels
);
991 if (assemble_check_(il_
, ll_
, allow_short_labels
)) {
992 fprintf(stderr
, "errors prevented assembly\n");
996 if (output_(il_
, out_filename
)) {
997 fprintf(stderr
, "failed to create output\n");