scotws wrote:
Again, please do comment on anything you see -- even if I decide not to include it, I'm sure to learn something!
Okay, I went through the first 1000 lines of your Tali-Forth.asm source, and saved you 87 bytes without any apparent side-effects, assuming that register Y is free for use by the primitives and TMPCNT and TMPADR aren't used to transmit data between primitives. Tali-Forth.asm is over 7000 lines long, so I could probably save you about 520 bytes more (by extrapolation) in that file alone. It's getting late here, and I have to get to work tomorrow, but you should get the idea of what I'm trying to do (these suggestions are untested).
In Tali-Forth.asm:
In f_toupper (saves 1 byte)
change:
Code:
sec
sbc #$20 ; offset to upper case
to:
Code:
adc #'A-'a ; offset to upper case
In f_strtoupper (saves 1 byte)
delete:
Code:
lda 1,x ; LSB of u, we ignore MSB
sta TMPCNT
and change:
Code:
ldy TMPCNT
to:
Code:
ldy 1,x ; LSB of u, we ignore MSB
dey ; we don't want to convert the byte
; following the end of our string!
In _parseword (saves 1 byte)
change:
Code:
ldy #$00
lda #$20 ; opcode for JSR
sta (CP),y
iny
to:
Code:
lda #$20 ; opcode for JSR
sta (CP)
ldy #1
In _execute (saves 1 byte)
change:
Code:
; Forth commands end with a RTS instruction. We fake the
; return address by pushing the correct address to the
; 65c02's stack and then doing a normal JMP. When we return,
; we land on a NOP so we don't have to DEC the return address
lda #>_doneexec ; push MSB first
pha
lda #<_doneexec
to:
Code:
; Forth commands end with a RTS instruction. We fake the
; return address by pushing the correct address - 1 to the
; 65c02's stack and then doing a normal JMP. When we return,
; we land on the correct address due to the nature of RTS.
lda #>(_doneexec-1) ; push MSB first
pha
lda #<(_doneexec-1)
pha
and change:
Code:
_doneexec: ; Keep the NOP here as the landing site for the indirect
; subroutine jump (easier and quicker than adjusting the
; return address on the 65c02's stack)
nop
to:
Code:
_doneexec: ; Landing site for the indirect jump
In _compile (saves 1 byte)
change:
Code:
cmp #$01
beq _execute
to:
Code:
dec
beq _execute
In f_nib2asc (saves 14 bytes)
change:
Code:
phx
and #$0F
tax
lda hexstr,x
jsr f_putchr
plx
to:
Code:
and #$0F
ora #'0
cmp #'9+1
bcc +
adc #6
* jmp f_putchr
and delete:
Code:
hexstr: .byte "0123456789ABCDEF"
at the end of the file (it was just a duplicate of the beginning of alphastr anyway).
In f_putchr (saves 7 bytes)
change:
Code:
; PORT 0: DEFAULT, Terminal, ASCI
jsr k_wrtchr
bra _done
_c1: cpy #$01
bne _c2
; PORT 1: VIA Port A output
jsr k_wrtchrVIAa
bra _done
_c2: cpy #$02
bne _err
; PORT 2: VIA Port B output
jsr k_wrtchrVIAb
bra _done
_err: lda #$08 ; string code for unknown channel
jmp error
_done: ply
rts
to:
Code:
; PORT 0: DEFAULT, Terminal, ASCI
ply
jmp k_wrtchr
_c1: dey
bne _c2
; PORT 1: VIA Port A output
ply
jmp k_wrtchrVIAa
_c2: dey
bne _err
; PORT 2: VIA Port B output
ply
jmp k_wrtchrVIAb
_err: lda #$08 ; string code for unknown channel
jmp error
In f_prtzerostr (saves 7 bytes)
change:
Code:
f_wrtzerostr: ; version without a final linefeed
ldy #$00
phy
bra _common
f_prtzerostr: ; version with a final linefeed
ldy #$FF
phy
_common: phx ; use X as indext to table
asl
tax
lda strtbl,x
sta TBLLOC
inx
lda strtbl,x
sta TBLLOC+1
plx
ldy #$00
* lda (TBLLOC),y
beq _linefeed
jsr f_putchr
iny
bra -
_linefeed: ; get flag to see if we print a final linefeed or not
ply
beq _done
; print a line feed
lda #AscLF
jsr f_putchr
_done: rts
to:
Code:
f_prtzerostr: ; version with a final linefeed
ldy #$FF
.byte $2C ; BIT abs opcode (skip next 2-byte instr.)
f_wrtzerostr: ; version without a final linefeed
ldy #$00
phy
_common: asl
tay ; use Y as index to table
lda strtbl,y
sta TBLLOC
iny
lda strtbl,y
sta TBLLOC+1
ldy #$00
* lda (TBLLOC),y
beq _linefeed
jsr f_putchr
iny
bra -
_linefeed: ; get flag to see if we print a final linefeed or not
lda #AscLF
ply
bne f_putchr ; print a line feed
rts
In f_getchr (saves 7 bytes)
change:
Code:
; PORT 0: DEFAULT, Terminal, ASCI
jsr k_getchr
bra _done
_c1: cpy #$01
bne _c2
; PORT 1: VIA Port A input
jsr k_getchrVIAa
bra _done
_c2: cpy #$02
bne _err
; PORT 2: VIA Port B input
jsr k_getchrVIAb
bra _done
_err: lda #$08 ; string code for wrong channel
jmp error
_done: ply
rts
to:
Code:
; PORT 0: DEFAULT, Terminal, ASCI
ply
jmp k_getchr
_c1: dey
bne _c2
; PORT 1: VIA Port A input
ply
jmp k_getchrVIAa
_c2: dey
bne _err
; PORT 2: VIA Port B input
ply
jmp k_getchrVIAb
_err: lda #$08 ; string code for wrong channel
jmp error
In f_cmp16 (saves 2 bytes)
change:
Code:
; low bytes are not equal, compare MSB
lda 2,x ; MSB of TOS
sbc 4,x ; MSB of NOS
ora #$01 ; Make Zero Flag 0 because we're not equal
bvs _overflow
bra _done
_equal: ; low bytes are equal, so we compare high bytes
lda 2,x ; MSB of TOS
sbc 4,x ; MSB of NOS
bvc _done
_overflow: ; handle overflow because we use signed numbers
eor #$80 ; complement negative flag
ora #$01 ; if overflow, we can't be equal
_done: rts
to:
Code:
; low bytes are not equal, compare MSB
lda 2,x ; MSB of TOS
sbc 4,x ; MSB of NOS
bvs _overflow
bra _notequal
_equal: ; low bytes are equal, so we compare high bytes
lda 2,x ; MSB of TOS
sbc 4,x ; MSB of NOS
bvc _done
_overflow: ; handle overflow for signed numbers
eor #$80 ; complement negative flag
_notequal:
ora #$01 ; clear zero flag
_done: rts
In fc_dovar (saves 10 bytes)
change:
Code:
fc_dovar: ; pull return address off of the machine's stack
pla ; LSB of return address
sta TMPADR2
pla ; MSB of return address
sta TMPADR2+1
; The address is one byte below this
inc TMPADR2
bne +
inc TMPADR2+1
; get variable and push it on the stack
* dex
dex
lda TMPADR2 ; LSB
sta 1,x
lda TMPADR2+1 ; MSB
sta 2,x
to:
Code:
fc_dovar: ; pull return address off of the machine's stack,
; add 1 to make up for JSR idiosyncrasy, and push
; it on the forth stack
dex
dex
pla ; LSB of return address
ply ; MSB of return address
inc ; add 1 to make it correct
sta 1,x
bne +
iny
+ sty 2,x
In a_plit (saves 4 bytes)
change:
Code:
; move up one address so we are pointing to the byte after
; the JSR command.
inc TMPADR
bne +
inc TMPADR+1
* ; get bytes after JSR address
lda (TMPADR) ; LSB
sta 1,x
inc TMPADR
bne +
inc TMPADR+1
* lda (TMPADR) ; LSB
sta 2,x
; replace the new address on the stack
lda TMPADR+1
pha
lda TMPADR
pha
to:
Code:
ldy #1
; get bytes after JSR address
lda (TMPADR),y ; LSB
sta 1,x
iny
lda (TMPADR),y ; MSB
sta 2,x
; replace the new address on the stack
tya
clc
adc TMPADR
tay
lda TMPADR+1
adc #0
pha
phy
In a_lit (saves 2 bytes)
change:
Code:
a_lit: ldy #$00
; we first compile the call to (LITERAL)
lda #$20 ; opcode for the JSR instruction
sta (CP),y
iny
lda #<l_plit
sta (CP),y
iny
lda #>l_plit
sta (CP),y
iny
; bookkeeping: update CP
tya
clc
adc CP
to:
Code:
a_lit: ldy #$01
; we first compile the call to (LITERAL)
lda #$20 ; opcode for the JSR instruction
sta (CP)
lda #<l_plit
sta (CP),y
iny
lda #>l_plit
sta (CP),y
; bookkeeping: update CP
tya
sec
adc CP
In a_quit (saves 2 bytes)
change:
Code:
a_quit: ; Reset the return stack (65c02 stack) pointer
stx TMPX
ldx #RP0
txs
ldx TMPX
to:
Code:
a_quit: ; Reset the return stack (65c02 stack) pointer
txa
ldx #RP0
txs
tax
In a_dump (saves 27 bytes)
change:
Code:
a_dump: ; if we were given zero bytes to display, abort the whole
; thing
lda 1,x
ora 2,x
beq _done
; start a new line
jsr l_cr
; put stack parameters where we can work with them
lda 1,x
sta TMPCNT ; this is the counter LSB
lda 2,x
sta TMPCNT+1 ; MSB
lda 3,x
sta TMPADR ; LSB
lda 4,x
sta TMPADR+1 ; MSB
; start internal counter so we only display 16 numbers
; per row
ldy #$00
_loop: ; dump the contents
lda (TMPADR)
jsr f_byte2hexasc
jsr l_space
iny
cpy #$10
bne _nextchar
; start next line
jsr l_cr
ldy #$00
_nextchar: ; next char
inc TMPADR
bne _counter
inc TMPADR+1
_counter: ; loop counter
lda TMPCNT
bne +
dec TMPCNT+1
* dec TMPCNT
; loop control
lda TMPCNT
ora TMPCNT+1
bne _loop
to:
Code:
a_dump: ; start internal counter for 16 numbers per row
jsr l_cr
ldy #16
_loop: ; if there are zero bytes left to display, we're done
lda 1,x
ora 2,x
beq _done
; dump the contents
lda (3,x)
jsr f_byte2hexasc
jsr l_space
_nextchar: ; next char
inc 3,x
bne _counter
inc 4,x
_counter: ; loop counter
lda 1,x
bne +
dec 2,x
* dec 1,x
dey
bne _loop
bra a_dump
HTH,
Mike B.
[Edit: Made a few minor corrections.]
[Edit #2: I totally messed up a_plit ... I'll fix it later ... a_dump sure kicks butt, though, don't it?]
[Edit #3: I fixed a_plit ... I think.]
[Edit #4: ina -> inc , dea -> dec]