+ DEBUG_PRINTF("%s: allocating buffer of size %zu\n", __func__, new_sz);
+ tmp_ptr = realloc(operand, new_sz);
+ if (tmp_ptr == NULL) {
+ fprintf(stderr, "%s(%zu):%s\n", "realloc", new_sz, strerror(errno));
+ return -1;
+ }
+ operand = tmp_ptr;
+ operand_sz = new_sz;
+ }
+
+ o = strcpy(operand, operand_orig);
+
+ DEBUG_PRINTF("%s: operand '%s' ", __func__, operand); /* completed later */
+
+ /* this is a very stupid parser */
+
+ /* first, let's trim all whitespace out of string at once to make parsing easier */
+ buf_strip_chars_(operand, " \t\n");
+
+ /* single character might match a register */
+ if (strlen(operand) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *operand)) {
+ DEBUG_PRINTFQ("is register %c\n", *operand);
+ return register_enumerate_(*operand);
+ }
+
+ /* easy matches */
+
+ /* push and pop now share the same operand value */
+ if (strcasecmp(operand, "POP") == 0
+ || strcasecmp(operand, "[SP++]") == 0) {
+ DEBUG_PRINTFQ("is POP\n");
+ return 0x18;
+ }
+ if (strcasecmp(operand, "PUSH") == 0
+ || strcasecmp(operand, "[--SP]") == 0) {
+ DEBUG_PRINTFQ("is PUSH\n");
+ return 0x18;
+ }
+
+ if (strcasecmp(operand, "PEEK") == 0
+ || strcasecmp(operand, "[SP]") == 0) {
+ DEBUG_PRINTFQ("is PEEK\n");
+ return 0x19;
+ }
+
+ /* this could be better, if we had a real token tree */
+ if (strncasecmp(operand, "PICK", 4) == 0) {
+ DEBUG_PRINTFQ("is PICK ");
+
+ errno = 0;
+ l = strtoul(operand + 4, &ep, 0);
+ if (errno == 0
+ && (*(operand + 4) && (*ep == '\0')) ) {
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("(out of range)\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+ } else if (errno == ERANGE) {
+ DEBUG_PRINTFQ("(out of range)\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ DEBUG_PRINTFQ("0x%04x\n", *nextword);
+ return 0x1a;
+ }
+
+ if (strcasecmp(operand, "SP") == 0) {
+ DEBUG_PRINTFQ("is register SP\n");
+ return 0x1b;
+ }
+ if (strcasecmp(operand, "PC") == 0) {
+ DEBUG_PRINTFQ("is register PC\n");
+ return 0x1c;
+ }
+ if (strcasecmp(operand, "EX") == 0) {
+ DEBUG_PRINTFQ("is register EX\n");
+ return 0x1d;
+ }
+
+ /* is the operand [bracketed]? */
+ if (operand[0] == '[' && operand[strlen(operand) - 1] == ']') {
+ /* eat the brackets */
+ operand[strlen(operand) - 1] = '\0';
+ operand++;
+
+ /* is it [register]? */
+ if (strlen(operand) == 1
+ && strchr("AaBbCcXxYyZzIiJj", *operand)) {
+ DEBUG_PRINTFQ("is dereferenced register %c\n", *operand);
+ return 0x08 | register_enumerate_(*operand);
+ }
+
+ /* is it [register+something]? */
+ if ( (ep = strchr(operand, '+')) ) {
+ char *reg;
+ char *constant;
+
+ DEBUG_PRINTFQ("is multipart.. ");
+
+ /* eat the plus */
+ *ep = '\0';
+ ep++;
+
+ /* figure out which one is which */
+ if ((strlen(ep) == 1 && strchr("AaBbCcXxYyZzIiJj", *ep))
+ || (strlen(ep) == 2 && strcasecmp(ep, "SP")) ) {
+ reg = ep;
+ constant = operand;
+ } else if ((strlen(operand) == 1 && strchr("AaBbCcXxYyZzIiJj", *operand))
+ || (strlen(operand) == 2 && strcasecmp(operand, "SP")) ) {
+ reg = operand;
+ constant = ep;
+ } else {
+ DEBUG_PRINTFQ("is unparsable\n");
+ fprintf(stderr, "couldn't parse operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ /* check if something is understandable as a value */
+ errno = 0;
+ l = strtoul(constant, &ep, 0);
+ if (errno == 0
+ && (*constant && (*ep == '\0')) ) {
+ /* string conversion went without issue */
+ /* validate it will fit in a word */
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ /* seems fine */
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+
+ /* special case [SP+n]/PICK n */
+ if (strlen(reg) == 2) {
+ DEBUG_PRINTFQ("is PICK 0x%04x\n", *nextword);
+ return 0x1a;
+ }
+
+ DEBUG_PRINTFQ("is a dereferenced register (%c) + constant (%hu)\n", *reg, *nextword);
+ return 0x10 | register_enumerate_(*reg);
+ } else if (errno == ERANGE) {
+ fprintf(stderr, "%s('%s'):%s\n", "strtoul", constant, strerror(errno));
+ }
+
+ /* what? still here? assume it's a label, I guess */
+ /* try to populate nextword with label address */
+ if (label_addr_(labels, operand, nextword)) {
+ DEBUG_PRINTFQ("(deferred label resolution)\n");
+ *nextwordused += 1;
+ return -2;
+ }
+ DEBUG_PRINTFQ("is a dereferenced register (%c) + label\n", *reg);
+ *nextwordused += 1;
+ return 0x10 | register_enumerate_(*reg);
+ }
+
+ /* it must just be a dereferenced literal then */
+
+ errno = 0;
+ l = strtoul(operand, &ep, 0);
+ if (errno == 0
+ && (*operand && (*ep == '\0')) ) {
+ /* string conversion went without issue */
+ /* validate it will fit in a word */
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ DEBUG_PRINTFQ("is a dereferenced literal value (%hu)\n", *nextword);
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ return 0x1e;
+ } else if (errno) {
+ /* if number wasn't parsable, just fall through and assume it's a label */
+ }
+
+ /* not a number? try a label */
+ if (label_addr_(labels, operand, nextword)) {
+ DEBUG_PRINTFQ("(deferred label resolution)\n");
+ *nextwordused += 1;
+ return -2;
+ }
+ DEBUG_PRINTFQ("is a dereferenced label\n");
+ *nextwordused += 1;
+ return 0x1e;
+ }
+
+ /* left with a literal or a label, then */
+
+ errno = 0;
+ l = strtoul(operand, &ep, 0);
+ if (errno == 0
+ || (*operand && (*ep == '\0')) ) {
+ if (l > 0xffff) {
+ DEBUG_PRINTFQ("is out of range\n");
+ fprintf(stderr, "constant invalid in operand '%s'\n", operand_orig);
+ return -1;
+ }
+
+ DEBUG_PRINTFQ("is literal value (%lu)\n", l);
+ if (l < 0x1f) {
+ return l + 0x21;
+ }
+ if (l == 0xffff) {
+ return 0x20;
+ }
+
+ *nextword = l & 0xffff;
+ *nextwordused += 1;
+ return 0x1f;
+ }
+
+ /* try to populate nextword with label address */
+ if (label_addr_(labels, operand, nextword)) {
+ DEBUG_PRINTFQ("(deferred label resolution)\n");
+ /* assume non-small literal value */
+ *nextwordused += 1;
+ return -2;
+ }
+
+ DEBUG_PRINTFQ("is label '%s' (0x%02hx)\n", operand, *nextword);
+ if (allow_short_labels
+ && (*nextword < 0x1f) ) {
+ DEBUG_PRINTF("small value label win\n");
+ return (0x21 + *nextword) & N_BIT_MASK(OPERAND_A_BITS);
+ }
+ if (allow_short_labels
+ && (*nextword == 0xffff) ) {
+ DEBUG_PRINTF("small value label win\n");
+ return 0x20;
+ }
+
+ *nextwordused += 1;
+ return 0x1f;
+}
+
+/* prints an instruction's assembly */
+static inline
+int instruction_print_(struct instruction_ *i, unsigned int with_label) {
+ struct operand_ *o;
+ int r;
+
+ if (with_label)
+ r = printf("%-16s ", i->label ? i->label : "");
+
+ r = printf("%3s", i->opcode ? i->opcode : "");
+
+ for (o = i->operands; o; o = o->next)
+ r += printf(" %s%s", o->operand, o->next ? "," : "");
+
+ if (i->ready) {
+ DCPU16_WORD l;
+ printf(" [");
+ l = dcpu16_mnemonify_buf(i->instr_words);
+ printf("]");
+
+ if (i->length != l)
+ DEBUG_PRINTF("!!internal inconsistency!! i->length:%u l:%hu should match\n", i->length, l);
+ }
+ return r;
+}
+
+/* tokenize_line_
+ * Parses a zero-terminated line of input into a newly-allocated struct instruction_.
+ * [label] instruction [operand[,operand[,...]]]
+ * Does no validation of contents of any of these tokens, as of yet.
+ * does not clean up after itself if a malloc fails
+ */
+static
+int tokenize_line_(char *line, struct instruction_ **next_instr) {
+ const char const *whitespace = " \t\n";
+ const char const *quotes = "\"'`";
+ struct instruction_ *instr = NULL;
+ char *x, *st, *qt;
+ char *label, *opcode;
+ struct operand_ *operand_list = NULL;
+ struct operand_ **operand_tail = &operand_list;
+ size_t instr_words_needed = 0;
+
+ assert(line);
+ assert(next_instr);