Here's the C code for the disassembler, minus a few helper functions which are pretty obvious:
Code:
// describes the mnemonic, base value, and permissible modes
// the base is the opcode to which the bbb values are added (or for
// opcodes with no bbb value, simply the opcode value)
typedef struct {
char mnem [4];
unsigned char base;
unsigned int modes;
} MNEMONIC;
// convert mode value (from instruction) to MODES
MODES val_to_mode [8] = {indx, zpg, imm, mabs, indy, zpx, absy, absx};
MNEMONIC const mnem[MAX_OPCODES] = {
// mnem base modes
// 0
{"ora", 0x01, mabs | zpg | imm | absx | absy | indx | indy | zpx},
{"and", 0x21, mabs | zpg | imm | absx | absy | indx | indy | zpx},
{"eor", 0x41, mabs | zpg | imm | absx | absy | indx | indy | zpx},
{"adc", 0x61, mabs | zpg | imm | absx | absy | indx | indy | zpx},
{"sta", 0x81, mabs | zpg | absx | absy | indx | indy | zpx},
{"lda", 0xa1, mabs | zpg | imm | absx | absy | indx | indy | zpx},
{"cmp", 0xc1, mabs | zpg | imm | absx | absy | indx | indy | zpx},
{"sbc", 0xe1, mabs | zpg | imm | absx | absy | indx | indy | zpx},
// 8
{"brk", 0x00, imp},
{"jsr", 0x20, mabs},
{"rti", 0x40, imp},
{"rts", 0x60, imp},
// 12
{"???", 0x00, mabs}, // dummy entry to simplify the decode
{"bit", 0x20, mabs | zpg},
{"jmp", 0x40, mabs},
{"jmp", 0x60, ind}, // jmp ind
{"sty", 0x80, mabs | zpg | zpx},
{"ldy", 0xa0, mabs | zpg | imm | absx | zpx},
{"cpy", 0xc0, mabs | zpg | imm},
{"cpx", 0xe0, mabs | zpg | imm},
// 20
{"asl", 0x02, acc | mabs | zpg | absx | zpx},
{"rol", 0x22, acc | mabs | zpg | absx | zpx},
{"lsr", 0x42, acc | mabs | zpg | absx | zpx},
{"ror", 0x62, acc | mabs | zpg | absx | zpx},
{"stx", 0x82, mabs | zpg | zpy},
{"ldx", 0xa2, mabs | zpg | imm | absy | zpy},
{"dec", 0xc2, mabs | zpg | absx | zpx},
{"inc", 0xe2, mabs | zpg | absx | zpx},
// 28
{"bpl", 0x10, rel},
{"bmi", 0x30, rel},
{"bvc", 0x50, rel},
{"bvs", 0x70, rel},
{"bcc", 0x90, rel},
{"bcs", 0xb0, rel},
{"bne", 0xd0, rel},
{"beq", 0xf0, rel},
// 36
{"php", 0x08, imp},
{"clc", 0x18, imp},
{"plp", 0x28, imp},
{"sec", 0x38, imp},
{"pha", 0x48, imp},
{"cli", 0x58, imp},
{"pla", 0x68, imp},
{"sei", 0x78, imp},
{"dey", 0x88, imp},
{"tya", 0x98, imp},
{"tay", 0xa8, imp},
{"clv", 0xb8, imp},
{"iny", 0xc8, imp},
{"cld", 0xd8, imp},
{"inx", 0xe8, imp},
{"sed", 0xf8, imp},
// 52
{"txa", 0x8a, imp},
{"txa", 0x9a, imp},
{"tax", 0xaa, imp},
{"tsx", 0xba, imp},
{"dex", 0xca, imp},
{"???", 0x00, imp}, // dummy entry to simplify the decode
{"nop", 0xea, imp},
};
////////////////////////////////////////////////////////////////////////
int opcode_to_text (int ptr)
{
// disassemble the code at ptr, print it nicely,
// and return the address of the next instruction
MODES mode;
int index;
int size = 0;
index = disassemble(*(char *)ptr, &mode);
// print address and opcode
hex4out(ptr);
putchar(' ');
hex2out(*(char *)(ptr));
// print associated data if required
if (mode > rel)
{
hex2out(*(char *)(ptr + 1));
hex2out(*(char *)(ptr + 2));
}
else if (mode > acc)
{
hex2out(*(char *)(ptr + 1));
}
else
{
putchar(TAB);
}
putchar(TAB);
put_s(mnem[index].mnem);
putchar(' ');
switch (mode)
{
case imp: // opc
// no further output
size = 1;
break;
case acc: // opc a
putchar ('a');
size = 1;
break;
case mabs: // opc $hhll
putchar('$');
hex2out(*(char *)(ptr + 2));
hex2out(*(char *)(ptr + 1));
size = 3;
break;
case zpg: // opc ll
putchar('$');
hex2out(*(char *)(ptr + 1));
size = 2;
break;
case imm: // opc #ll
putchar('#');
putchar('$');
hex2out(*(char *)(ptr + 1));
size = 2;
break;
case absx: // opc $hhll,X
putchar('$');
hex2out(*(char *)(ptr + 2));
hex2out(*(char *)(ptr + 1));
put_s(",x");
size = 3;
break;
case absy: // opc $hhll,Y
putchar('$');
hex4out(*(int *)(ptr + 1));
put_s(",y");
size = 3;
break;
case indx: // opc (ll,X)
put_s("($");
hex2out(*(char *)(ptr + 1));
put_s(",x)");
size = 2;
break;
case indy: // opc (ll),Y
put_s("($");
hex2out(*(char *)(ptr + 1));
put_s("),y");
size = 2;
break;
case zpx: // opc ll,x
putchar('$');
hex2out(*(char *)(ptr + 1));
put_s(",x");
size = 2;
break;
case zpy: // opc ll,y
putchar('$');
hex2out(*(char *)(ptr + 1));
put_s(",y");
size = 2;
break;
case rel: // opc bb
putchar('$');
hex2out(*(char *)(ptr + 1));
size = 2;
break;
case ind: // opc (hhll)
put_s("($");
hex4out(*(int *)(ptr + 1));
putchar(')');
size = 3;
break;
}
crlf();
return size;
}
////////////////////////////////////////////////////////////////////////
int disassemble (unsigned char opcode, MODES * mode)
{
// decode an opcode to return an index into the opcode mnemonic table,
// or -1 if nothing is found
// does not check that the opcode represents a valid instruction (eg sta immediate)
// but writes the discovered mode into modex
// assumes 6502N basic instruction set
unsigned char sliced; // an opcode with bbb zeroed out
unsigned char bbb; // the mystery three bits, shifted right to 0-2
unsigned char index; // which entry in a group?
int ret = -1; // -1 if nothing found
bbb = (opcode & 0x1c) >> 2;
sliced = opcode & 0xe3;
index = (opcode & 0xe0) >> 5;
// ora and eor adc sta lda cmp sbc
if (mnem[index].base == sliced)
{
// we found one of eight
ret = index;
*mode = val_to_mode[bbb];
}
// get jsr brk rti rts out of the way to avoid a clash 0x00, 0x20, 0x40, 0x60
else if (0x00 == (opcode & 0x9f))
{
index = opcode >> 5;
if (mnem[index + 8].base == opcode) // test against the whole opcode
{
// we found one of four
ret = index + 8;
*mode = (0x20 == opcode) ? mabs : imp;
}
}
// php clc plp sec pha cli pla sei dey tya tay clv iny cld inx sed
// 0x08 to 0xf8
else if (0x08 == (opcode & 0x0f))
{
// one of sixteen, no alternate addressing modes
ret = (opcode >> 4) + 36;
*mode = imp;
}
// txa txa tax tsx dex ??? nop
// 0x0a to 0xea
else if (0x8a == (opcode & 0x8f))
{
// one of six, plus dummy for unused code
ret = ((opcode - 0x80) >> 4) + 52;
*mode = imp;
}
// bpl bmi bvc bvs bcc bcs bne beq
// 0x10 to 0xf0
else if (0x10 == (opcode & 0x1f))
{
// one of eight, no alternate addressing modes
ret = (opcode >> 5) + 28;
*mode = rel;
}
// bit jmp sty ldy cpy cpx (0x20 to 0xe0)
else if (mnem[index + 12].base == sliced)
{
// found one of eight
ret = index + 12;
*mode = (0x6c == opcode) ? *mode = ind : val_to_mode[bbb];
if (indx == *mode) *mode = imm; // this block is different!
}
// asl rol lsr ror stx ldx dec inc
else if (mnem[index + 20].base == sliced)
{
// found one of eight
ret = index + 20;
*mode = val_to_mode[bbb];
// uses acc instead of imm except for ldx
if (imm == *mode) *mode = acc;
// edge case: ldx uses imm instead of indx
if (0xa2 == opcode) *mode = imm;
// edge cases: ldx and stx use zpy instead of zpx
if ((0xb6 == opcode) || (0x96 == opcode)) *mode = zpy;
}
return ret;
}
////////////////////////////////////////////////////////////////////////
Neil