#!/usr/bin/perl -w
# ex: set ts=4:
# 65C02 disassembler
# (C) 2001-2003 Christopher A. Eslinger
# $Id: insds.pl,v 1.17 2003/04/08 06:30:48 yakko Exp $
# feel free to golf this down; I did this in a day, and it probably shows
# License: Perl Artistic

use strict;
use Getopt::Std;

# usage: insds [-b hexbase] [-s symbolfile] [-n] [-o offset] [filename]
my %opt;
getopts('b:s:o:n', \%opt);

my $file=(defined $ARGV[0]) ? $ARGV[0] : "&STDIN";

# list of mnemonics and amodes/insnlen as a byte
# what i really want is to implement this like Woz did,
# ie, port it from  6502 to Perl... but that's later.
# the address modes are:
# 0:no operand 1:absolute,X 2:absolute,Y 3:relative 4:zeropage absolute
# 5:zeropage,X 6:zeropage,Y 7:immediate 8:(indirect) 9:(indirect zp,X)
# 10:(indirect zp),Y 11:(indirect abs) 12:(indirect abs,X) 13:absolute
my @opcodes=qw(
	BRK 01 ORA 92 HCF 01 HCF 01 TSB 42 ORA 42 ASL 42 RMB0 42
	PHP 01 ORA 72 ASL 01 HCF 01 TSB d3 ORA d3 ASL d3 BBR0 32
	BPL 32 ORA a2 ORA 82 HCF 01 TRB 42 ORA 52 ASL 52 RMB1 42
	CLC 01 ORA 23 INC 01 HCF 01 TRB d3 ORA 13 ASL 13 BBR1 32
	JSR d3 AND 92 HCF 01 HCF 01 BIT 42 AND 42 ROL 42 RMB2 42
	PLP 01 AND 72 ROL 01 HCF 01 BIT d3 AND d3 ROL d3 BBR2 32
	BMI 32 AND a2 AND 82 HCF 01 BIT 52 AND 52 ROL 52 RMB3 42
	SEC 01 AND 23 DEC 01 HCF 01 BIT 13 AND 13 ROL 13 BBR3 32
	RTI 01 EOR 92 HCF 01 HCF 01 HCF 01 EOR 42 LSR 42 RMB4 42
	PHA 01 EOR 72 LSR 01 HCF 01 JMP d3 EOR d3 LSR d3 BBR4 32
	BVC 32 EOR a2 EOR 82 HCF 01 HCF 01 EOR 52 LSR 52 RMB5 42
	CLI 01 EOR 23 PHY 01 HCF 01 HCF 01 EOR 13 LSR 13 BBR5 32
	RTS 01 ADC 92 HCF 01 HCF 01 STZ 42 ADC 42 ROR 42 RMB6 42
	PLA 01 ADC 72 ROR 01 HCF 01 JMP b3 ADC d3 ROR d3 BBR6 32
	BVS 32 ADC a2 ADC 82 HCF 01 STZ 52 ADC 52 ROR 52 RMB7 42
	SEI 01 ADC 23 PLY 01 HCF 01 JMP c3 ADC 13 ROR 13 BBR7 32
	BRA 32 STA 92 HCF 01 HCF 01 STY 42 STA 42 STX 42 SMB0 42
	DEY 01 BIT 72 TXA 01 HCF 01 STY d3 STA d3 STX d3 BBS0 32
	BCC 32 STA a2 STA 82 HCF 01 STY 52 STA 52 STX 52 SMB1 42
	TYA 01 STA 23 TXS 01 HCF 01 STZ d3 STA 13 STZ 13 BBS1 32
	LDY 72 LDA 92 LDX 72 HCF 01 LDY 42 LDA 42 LDX 42 SMB2 42
	TAY 01 LDA 72 TAX 01 HCF 01 LDY d3 LDA d3 LDX d3 BBS2 32
	BCS 32 LDA a2 LDA 82 HCF 01 LDY 52 LDA 52 LDX 52 SMB3 42
	CLV 01 LDA 23 TSX 01 HCF 01 LDY 13 LDA 13 LDX 13 BBS3 32
	CPY 72 CMP 92 HCF 01 HCF 01 CPY 42 CMP 42 DEC 42 SMB4 42
	INY 01 CMP 72 DEX 01 WAI 01 CPY d3 CMP d3 DEC d3 BBS4 32
	BNE 32 CMP a2 CMP 82 HCF 01 HCF 01 CMP 52 DEC 52 SMB5 42
	CLD 01 CMP 23 PHX 01 STP 01 HCF 01 CMP 13 DEC 13 BBS5 32
	CPX 72 SBC 92 HCF 01 HCF 01 CPX 42 SBC 42 INC 42 SMB6 42
	INX 01 SBC 72 NOP 01 HCF 01 CPX d3 SBC d3 INC d3 BBS6 32
	BEQ 32 SBC a2 SBC 82 HCF 01 HCF 01 SBC 52 INC 52 SMB7 42
	SED 01 SBC 23 PLX 01 HCF 01 HCF 01 SBC 13 INC 13 BBS7 32
	HEX d1 DA d2
);

# hash to contain anon arrays of (amode, insnlen)
my %ops;

# address mode formatting for operands
# address modes are as above
my %am=(
	1 => \&am_abx,
	2 => \&am_aby,
	5 => \&am_abx,
	6 => \&am_aby,
	7 => \&am_imm,
	8 => \&am_ind,
	9 => \&am_idx,
	10 => \&am_idy,
	11 => \&am_ind,
	12 => \&am_idx
);

# $ops{$foo} ->[0]    [1]    [2]
# build the mnemonic/amode/insnlen hash, keys are the opcode number
foreach my $i (0..(scalar @opcodes / 2)-1) {

	# mnemonics every even element
	my $mnem=$opcodes[$i*2];

	# amode/insnlen are every other (odd) value
	my $alen=$opcodes[($i*2)+1];

	# mask off amode, shift it down
	my $addrmode=(hex($alen)&240)>>4;

	# the insnlen is the lower nibble
	my $insnlen=hex($alen)&15;

	# build the hash using anon lists
	$ops{$i}=[ $mnem, $addrmode, $insnlen ];
}

# global vars
my $symfile=(defined($opt{"s"})) ? $opt{"s"} : undef;  # symbol file
my $pc=(defined($opt{"b"})) ? hex($opt{"b"}) : 0;      # starting PC
my $opcode;					# opcode from file
my $count;					# bytes read from file
my $insnlen;				# length of instruction
my $chunk=undef;			# bytes read out of file go here
my $offset;					# operand length
my %sym;					# symbol hash if symfile specified
my $nomli=(defined($opt{"n"})) ? 1 : 0;		# no MLI expansion
my $mli=0;				# flag MLI calls
my $mlistart=0;				# flag MLI calls
my $mlicall=0;				# MLI call number
my %mli;				# MLI symbol hash
my $mlisyms=0;			# do MLI symbols
my $byteskip=(defined($opt{"o"})) ? $opt{"o"} : 0;	# offset into file
my $dummy;				# dummy var for byte offset

# read the symbol file (if any)
if(defined($symfile)) {
	open(SYMBOLS, "<$symfile") or die("open($symfile): $!\n");
	while(<SYMBOLS>) {
		chomp;
		# the format of the symbol file is "HEXADDR SYMBOL"
		my($saddr,$stext)=split;
		$sym{hex($saddr)}=$stext;
	}
	close(SYMBOLS);
}

# read MLI symbols unless MLI expandos are turned off
if($nomli==0) {
	if(open(MLI,"<MLI.SYM")) {
		while(<MLI>) {
			chomp;
			my($mcall,$mlabl)=split;
			$mli{hex($mcall)}=$mlabl;
		}
		$mlisyms=1;
		close(MLI);
	} else {
		$mlisyms=0;
	}
}

# main loop, reads file (or STDIN) one insn at a time
open(FH,"<$file") or die("open($file): $!\n");

# throw away the first $opt{o} bytes of file, if needed
if($byteskip>0) {
	$count=read(FH,$dummy,$byteskip);
}

# get the opcode; determine insnlen from that
$count=read(FH,$opcode,1);

# loop over the entire file until eof
while($count!=0) {

	# get insnlen out of the hash, -- it to get operand length
	if($mli) {
		if($mlicall==0) {
			$mlicall=256;
			$insnlen=0;
		} else {
			$mlicall++;
			$insnlen=1;
		}
	} else {
		$mlicall=0;
		$insnlen=$ops{ord($opcode)}->[2];
		$insnlen--;
	}
#	printf("MLI state is $mli, MLI call is $mlicall\n");

	if($insnlen>0) {

		# insn has an operand, read however many bytes long it is
		$count=read(FH,$chunk,$insnlen);

		# form the chunk; this is our insn to disassemble
		$chunk=$opcode.$chunk;

		if($count<$insnlen) {

			# short read; pad to insnlen with \000
			$offset=$insnlen-$count;
			$chunk.="\000" x $offset;

			# and signal eof
			$count=0;
		}
	} else {
		# insn with no operand
		$chunk=$opcode;
	}

	# disassemble one insn
	print uc(sprintf("%04x",$pc)), "-   ";	# "E000-   "
	bytes($chunk);							# "A0 AE      "
	opcode($chunk);							# "LDY  #$AE"
	ascii($chunk);							# "| . |"

	# and get more insns from the file if not eof
	if($mli) {
		$mli=0 if($mlicall==257);
	} else {
		$mli=$mlistart;
		$mli=0 if($nomli==1);
	}
	$count=read(FH,$opcode,1) if($count>0);
}

close(FH) or warn("close($file): $!\n");

###
# subroutines
###

# print the bytes for this insn, left-justified in 11-char field
sub bytes {
	my $bytes=shift;

	# get opcode and insnlen to drive how much we'll print
	my $opcode=ord(substr($bytes,0,1));
	$opcode=$mlicall if($mli);
	my $insnlen=$ops{$opcode}->[2];

	my $i;
	my $fmtstr;
 
	# print out the bytes separated by " "
	for($i=0;$i<$insnlen;$i++) {
		# concatenate to the final string
		$fmtstr.=uc(sprintf("%02x ",ord(substr($bytes,$i,1))));
	}
	# determine MLI entry
	$mlistart=($fmtstr=~/^20 00 BF/) ? 1 : 0;
	# left-justify it to 11
	printf("%-11s",$fmtstr);
}

# print opcode and operand (if any)
sub opcode {
	my $bytes=shift;
	my $opcode;
	my $operlen;
	if($mli) {
		# specially-process MLI calls
		$opcode=$mlicall;
		$operlen=$mlicall-255;
	} else {
		# get opcode at current PC
		$opcode=ord(substr($bytes,0,1));
	}

	# get amode and insnlen
	my $amode=$ops{$opcode}->[1];
	my $insnlen=$ops{$opcode}->[2];

	# get operand length
	$operlen=$insnlen-1 unless($mli);

	my $operand;
	my $str;
	my $symaddr;

	# get the operand
	if($operlen==0) {
		# no operand
		$operand=" ";
	} else {
		# gather however many bytes the operand has, in reverse
		$str=reverse substr($bytes,1-$mli,$operlen);
		$operand=uc(unpack("H*",$str));

		# compute relative branch on amode=3
		$operand=reladdr($operand) if($amode==3);

		# convert operand to dec for symbol lookup
		$symaddr=hex($operand);

		# MLI call labeling
		if(($mlisyms)&&($mli)&&($mlicall==256)) {
			$operand=(defined($mli{$symaddr})) ?
				$mli{$symaddr} : '$' . $operand;
		} else {
			# operand labeling for non-MLI calls
			if(($amode!=7)&&(!$mli)) {
				# not immediate mode, so look up symbol and
				# replace operand if found
				$operand=(defined($sym{$symaddr})) ?
					$sym{$symaddr} : '$' . $operand;
			} else {
				# symbols don't make sense in immediate mode
				$operand='$' . $operand;
			}
		}
	}

	# label insn at PC if it has a symbol
	if(defined($symfile)) {
		my $labl=defined($sym{$pc}) ? $sym{$pc} : " ";
		printf("%-16s", $labl);
	}

	# print out the mnemonic
	printf("%-8s",$ops{$opcode}->[0]);

	# print operand in appropriate amode
	if(exists($am{$amode})) {
		# print in the right "other than absolute" mode
		$str=$am{$amode}->($operand);
	} else {
		# the operand has no modifiers
		$str=$operand;
	}
	printf("%-20s",$str);

	# insn complete; adjust the PC, roll over at 16bits if needed
	$pc+=$insnlen;
	$pc-=65536 if($pc>65535);
}

# print chunk in ASCII
sub ascii {
	my $bytestr=shift;
	my $char;
	my $ascii;

	# sanitize the chunk for display
	foreach my $byte (0..length($bytestr)-1) {

		# map the char to the lower 128
		$char=ord(substr($bytestr,$byte,1)) & 127;

		# print ctrl chars and 0x7f as "."
		if(($char==127) or ($char<32)) {
			$char=46; # ord(".")
		}

		# add it to the chars to print
		$ascii.=chr($char);
	}

	# print it left justified surrounded by "|"
	printf("|%-3s|\n",$ascii);
}

# compute relative branch
sub reladdr {
	my $offset=shift;
	my $addr;

	# work with offset in dec
	$offset=hex($offset);

	# take care of negative branch
	$offset-=256 if($offset>127);

	# branch is relative from start of NEXT insn, so add 2
	$addr=$pc+$offset+2;

	# roll (over|back) if needed
	$addr+=65536 if($addr<0);
	$addr-=65536 if($addr>65535);

	# convert to hex and pad
	return uc(sprintf("%04x",$addr));
}

###
# all these are for address mode printing
# accessed via the subroutine hash %am
###

# a "$FOO,X" operand
sub am_abx {
	my $operand=shift;
	return "${operand},X";
}

# a "$FOO,Y" operand
sub am_aby {
	my $operand=shift;
	return "${operand},Y";
}

# a "#$FOO" operand
sub am_imm {
	my $operand=shift;
	return "#${operand}";
}

# a "($FOO)" operand
sub am_ind {
	my $operand=shift;
	return "($operand)";
}

# a "($FOO,X)" operand
sub am_idx {
	my $operand=shift;
	return "(${operand},X)";
}

# a "($FOO),Y" operand
sub am_idy {
	my $operand=shift;
	return "($operand),Y";
}


