Re: Fast BCD multiplication
Posted: Thu Nov 16, 2017 1:36 pm
BigEd wrote:
Hmm, I thought it worked out neatly, but trying it out I seem to be confused.
Code: Select all
;Method 20 - Urdhva Tiryagbhyam
.IF MultiplyRoutine==20
;Comments include a workthrough for the example of 74 x 23
;(7 × 2), (7 × 3) + (4 × 2), (4 × 3)
; R0 R1 R3
; $14 $29 $12
;
; 4 9 2 Units U
; 1 2 1 Carried tens T
; 1 Traditional carry TC
; 1 7 0 2 Result 74 x 23 = 1702
SED ; Decimal mode can be set for the whole routine.
LDA Arg1 ;74 Mask out low nyble of Arg1
AND #$F0 ;7 and save it for later
STA R0 ;
STA R1 ;
LDA Arg2 ;23 Mask out low nyble of Arg2
; AND #$F0 ;2 and shift high nyble to
LSR ; units position
LSR ;
LSR ;
LSR ; 2
ORA R0 ;72 Merge the separated nybles.
TAX ;
LDA table_cz,X ; Do four bit multiply via Lookup 7x2 = 14
STA R0 ;14 Store result in R0
LDA Arg2 ;23 Mask out high nyble of Arg2
AND #$0F ; 3 and save it for later
STA R3 ; 3
ORA R1 ;73 Merge the separated nybles.Lookup(7x3) $21
TAX ;
LDA table_cz,X ; Do four bit multiply via Lookup 7x3 = 21
STA R1 ;21 Store result in R1
LDA Arg1 ;74 Mask out high nyble of Arg1
AND #$0F ; 4 and save it
STA R2 ; 4
LDA Arg2 ;23 Mask out low nyble of Arg2
AND #$F0 ;2
ORA R2 ;24 Merge with stored R2
TAX ;
LDA table_cz,X ; Do four bit multiply via Lookup 2x4 = 8
CLC
ADC R1 ; Add to stored R1 R1 = $08+$21 = $29
STA R1 ;
LDA #$00 ;
BCC .no_carry ; If result is over 99
LDA #$10 ; Carry the 100 digit over to the result
.no_carry STA ResultHi ;
LDA Arg1 ;74 Shift low nyble of Arg1 into tens position
ASL ;
ASL ;
ASL ;
ASL ;4
ORA R3 ;43 Merge with stored R3
TAX ;
LDA table_cz,X ;12 Do four bit multiply via Lookup 4x3 = 12
TAX ; Store result in X Reg, we'll need it a bit.
AND #$0F ;2 Mask high nyble
STA ResultLo ; Save first byble of final result
TXA ;12 Now we get the high nyble
LSR
LSR
LSR
LSR
STA R3 ;1 And store it in R3
LDA R1 ;29 Fetch stored R1
AND #$0F ; 9 and mask off high nyble
CLC
ADC R3 ;10 and add with R3 9 + 1 = 10
TAX ; save it in X
ASL ; shift low nyble
ASL
ASL
ASL ;0
ORA ResultLo
STA ResultLo ; Save second nyble of final result
TXA ;10
LSR
LSR
LSR
LSR ; 1 (Traditional carry)
STA R3
LDA R1 ;29 Fetch stored R1
; AND #$F0 ;20 and mask off low nyble
LSR
LSR
LSR
LSR ; 2 shift high nyble to low pos
CLC
ADC R3
STA R3 ; 3 and add to R3
LDA R0 ;14 Fetch stored R0
AND #$0F ; 4 and mask off high nyble
CLC
ADC R3 ; 7 and add to R3
TAX ; and store in X Reg
AND #$0F ; mask off high nyble
STA R3 ; store in R3
TXA ; Grab the value back for X Reg, we want the other nyble now
AND #$F0 ; mask off low nyble
STA R2 ; store in R2
LDA R0 ;14 Fetch stored R0
AND #$F0 ;1 and mask off low nyble
CLC
ADC ResultHi ; add some accumulated carries
ADC R2
ORA R3
STA ResultHi ; and store the final ResultHi
.ENDIF
Code: Select all
table_cz:
table_cz_start=*
; 0 1 2 3 4 5 6 7 8 9 |---- Address Padding ----|
.DB $00, $00, $00, $00, $00, $00, $00, $00, $00, $00, $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $01, $02, $03, $04, $05, $06, $07, $08, $09, $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $02, $04, $06, $08, $10, $12, $14, $16, $18 , $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $03, $06, $09, $12, $15, $18, $21, $24, $27 , $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $04, $08, $12, $16, $20, $24, $28, $32, $36 , $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $05, $10, $15, $20, $25, $30, $35, $40, $45 , $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $06, $12, $18, $24, $30, $36, $42, $48, $54 , $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $07, $14, $21, $28, $35, $42, $49, $56, $63 , $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $08, $16, $24, $32, $40, $48, $56, $64, $72 , $FA, $FB, $FC, $FD, $FE, $FF
.DB $00, $09, $18, $27, $36, $45, $54, $63, $72, $81 , $FA, $FB, $FC, $FD, $FE, $FF
;Slightly slower code could use the sub
;table above at just 64 bytes. Perhaps
;storing it in ZeroPage could gain back
;some cycles.
;Challenge: There are still 28 duplicate
;results in the sub table, in a predictable
;pattern......I see a way......
Code: Select all
lda #$56
sta op1
lda #$78
sta op2
;********************
;* ab *
;* cd =
;* _________________
;* >bd <bd
;* >da <da
;* >cb <cb
;* >ca <ca
;* _________________
BcdMul
lda #>multab
sta pnt+1
sed
lda op1
and #$0f ; b
tay
lda op2
and #$f0 ; c <<4
sta pnt
lda (pnt),y ; c * b
sta temp
lda op2
asl
asl
asl
asl ; d <<4
sta pnt
lda (pnt),y ; d * b
sta prod+1
lda op1
lsr
lsr
lsr
lsr ; a
tay
lda (pnt),y ; d * a
clc
adc temp ; b*c + d*a
tax ; split nibbles
lsr
lsr
lsr
lsr
sta prod
txa
asl
asl
asl
asl
clc
adc prod+1
sta prod+1 ; lo byte
lda op2
and #$f0 ; c <<4
sta pnt
lda (pnt),y ; c * a
adc prod
sta prod ; hi byte
cld
rts
op1 =$61
op2 =$62
temp =$63
prod =$64
pnt=$fe
align 256
multab
byte $00, $00, $00, $00, $00, $00, $00, $00, $00, $00, $0, $0, $0, $0, $0, $0
byte $00, $01, $02, $03, $04, $05, $06, $07, $08, $09, $0, $0, $0, $0, $0, $0
byte $00, $02, $04, $06, $08, $10, $12, $14, $16, $18, $0, $0, $0, $0, $0, $0
byte $00, $03, $06, $09, $12, $15, $18, $21, $24, $27, $0, $0, $0, $0, $0, $0
byte $00, $04, $08, $12, $16, $20, $24, $28, $32, $36, $0, $0, $0, $0, $0, $0
byte $00, $05, $10, $15, $20, $25, $30, $35, $40, $45, $0, $0, $0, $0, $0, $0
byte $00, $06, $12, $18, $24, $30, $36, $42, $48, $54, $0, $0, $0, $0, $0, $0
byte $00, $07, $14, $21, $28, $35, $42, $49, $56, $63, $0, $0, $0, $0, $0, $0
byte $00, $08, $16, $24, $32, $40, $48, $56, $64, $72, $0, $0, $0, $0, $0, $0
byte $00, $09, $18, $27, $36, $45, $54, $63, $72, $81, $0, $0, $0, $0, $0, $0