Code: Select all
;======================================================
; Bit Counting
;------------------------------------------------------
; This file contains various 6502 coded algorithms that
; count the number of one bits in a byte value.
;
; The different CPU performance and memory usage
; characteristics have been calculated for each method
; to show how speed and space trade offs.
;
; These examples have been developed for use with
; Michal Kowalski's 6502 emulator.
;
; If you can think of any other ways of calculating
; this or speeding up these routines please let me
; know.
;
; Andrew Jacobs
;======================================================
; Revision History:
; 2007-08-28 AJ First release.
;------------------------------------------------------
.ORG $00
SCRATCH .DS 2 ; Some scratch memory
.ORG $8000
.START $8000
;======================================================
; Bit Counting Via a Lookup Table
;------------------------------------------------------
; The fastest way of figuring out how many bits are set
; in a byte is to use a precalculated table and index
; into it using either X or Y.
;
; This method is so simple that you don't need to put
; the code in a subroutine for reuse.
;
; The only downside to this is that it needs a whole
; 256 byte page to hold the lookup table.
;
; Time: 6 cycles (+1 if data table crosses page)
; Size: 256 bytes (for data)
ByteAtATime:
LDX #54 ; Load value to lookup
LDA ByteBitCounts,X ; and get its bit count
NOP ; A contains bit count
; We can add together the bit counts of several bytes
; to compute the total number of bits in larger values.
LDA #$12 ; Set up an example
STA SCRATCH+0 ; 16-bit value
LDA #$34
STA SCRATCH+1
CLC ; Clear carry
LDX SCRATCH+0 ; Fetch first data byte
LDA ByteBitCounts,X ; bit count
LDX SCRATCH+1 ; And add in the next
ADC ByteBitCounts,X
NOP ; A has bit total count
JMP HalfTheDataTable ; Jump to next method
; Look up table of bit counts in the values $00-$FF
ByteBitCounts:
.BYTE 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4
.BYTE 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
.BYTE 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
.BYTE 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
.BYTE 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
.BYTE 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
;======================================================
; Bit Counting With A Smaller Lookup Table
;------------------------------------------------------
; We can half the size of the look up table by using
; the fact that the bit count for a value V in the
; range $80-$FF will be one more than that for V & $7F.
;
; So if we move bit 7 into the carry then look up the
; bit count for bits 6 downto 0 and add the carry back
; we get the right result.
;
; Time: 20 cycles (+1 if data table crosses page)
; Size: 142 bytes (13 for code, 128 for data)
HalfTheDataTable:
LDA #$54 ; Load a test value
JSR BitCountSeven ; Calculate the count
NOP ; Result is in A
JMP NybbleAtATime ; Jump to next method
;------------------------------------------------------
BitCountSeven:
CLC ; Ensure bit 0 is safe
ADC #$80 ; Put bit 7 into carry
AND #$7F ; Strip out bits 6-0
TAX
LDA SevenBitCounts,X ; Fetch the bit count
ADC #0 ; Add in bit 7
RTS
; Look up table of bit counts in the values $00-$7F
SevenBitCounts:
.BYTE 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4
.BYTE 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
.BYTE 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
.BYTE 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
;======================================================
; Bit Counting With An Even Smaller Lookup Table
;------------------------------------------------------
; If we are prepared to do a small amount of shifting
; then we can reduce the data table to just 16 bytes.
;
; This alogrithm breaks the value into two $0-$F nybble
; values and adds the bit counts for these together.
;
; Time: 39 cycles (+2 if data table crosses page)
; Size: 36 bytes (20 for code, 16 for data)
NybbleAtATime:
LDA #$54 ; Load a test value
JSR BitCountNybble ; Calculate the count
NOP ; Result is in A
JMP BitAtATime ; Jump to next method
;------------------------------------------------------
BitCountNybble:
PHA ; Save a copy of value
LSR ; Shift down hi nybble
LSR
LSR
LSR
AND #$0F
TAY ; And save in Y
PLA ; Recover value
AND #$0F ; Strip out lo nybble
TAX ; And save in X
CLC
LDA NybbleBitCounts,X ; Then add counts for
ADC NybbleBitCounts,Y ; each nybble
RTS
; Look up table of bit counts in the values $00-$0F
NybbleBitCounts:
.BYTE 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4
;======================================================
; Bit Counting Using Shifts
;------------------------------------------------------
; The smallest method for count bits shifts the value
; left one bit at a time and counts each one bit that
; is shifted out.
;
; If we use ASL to perform the shift then the value
; will become zero after at most eight iterations. By
; looking for see when the value has reached zero we
; can stop iterating as soon as there are no bits left
; to shift out.
;
; Time: 17 to 91 cycles (depending on the value)
; Size: 12 bytes
BitAtATime:
LDA #$54 ; Load a test value
JSR BitCountIterative ; Calculate the count
NOP ; Result is in A
JMP DivideAndConquer ; Jump to next method
;------------------------------------------------------
BitCountIterative:
LDX #0 ; Clear bit count
BitShiftLoop:
ASL ; Shift a bit
BCC BitCountSkip ; Did a one shift out?
INX ; Add one to count
ORA #0 ; Retest for zero
BitCountSkip:
BNE BitShiftLoop ; Repeat till zero
TXA ; Move count to A
RTS
;======================================================
; Divide and Conquer
;------------------------------------------------------
; This alogrithm works by aligning and adding parts of
; the value until finally a bit count is produced.
;
; +--------------------------------
; | 0 + 1 | 1 + 0 | 0 + 1 | 1 + 1 |
; +---v-------v-------v-------v----
; | 0 1 + 0 1 | 0 1 + 1 0 |
; +-------v---------------v--------
; | 0 0 1 0 + 0 0 1 1 |
; +---------------v----------------
; | 0 0 0 0 0 1 0 1 |
; +--------------------------------
;
; This method always takes the same amount of time
; regardless of the value
;
; Time: 66 cycles
; Size: 37 bytes
DivideAndConquer:
LDA #$54 ; Load a test value
JSR BitCountParallel ; Calculate the count
NOP ; Result is in A
BRK ; All done
;------------------------------------------------------
BitCountParallel:
PHA
AND #$55 ; Strip out odd bits
STA SCRATCH
PLA
LSR ; Strip out even bits
AND #$55
CLC ; And add together
ADC SCRATCH
PHA
AND #$33 ; Strip out odd pairs
STA SCRATCH
PLA
LSR
LSR
AND #$33 ; Strip out even pairs
CLC
ADC SCRATCH ; And add together
STA SCRATCH
LSR ; Shift down hi nybble
LSR
LSR
LSR
CLC
ADC SCRATCH ; Add to lo nybble
AND #$0F ; And prune to result
RTS
.END