Can 8080 code be smaller and faster than Z80 code?
Much has been said about how the new instructions added to the Z80 are often too slow to be useful, that these are more commonly used to create compact code rather than faster execution.
I have a subroutine for which a version written for the 8080 is both smaller and quicker. It is used by CP/M Turbo Pascal programs to release allocated memory back to the heap. The 8080 version is not only faster; it is substantially faster.
I did not originally set out to write a faster implementation but had to reach repeatedly into my bag of tricks to avoid running out of registers while trying to do a literal translation. The result ended up surprisingly more efficient.
The original Z80 code:
Code:
07241 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
07242 ;
07243 ; Dispose
07244 ;
07245 ; Input:
07246 ; HL = size of block to free
07247 ; Top of stack = the address of variable containing block address
07248 ;
1D7B 07249 L_1D7B:
1D7B EB [4] 07250 ex DE,HL ; Move size to DE
07251
1D7C E1 [10] 07252 pop HL ; Get return address
1D7D E3 [19] 07253 ex (SP),HL ; Swap for address of variable
07254
1D7E 7E [7] 07255 ld A,(HL) ; Get address of the block to free
1D7F 23 [5] 07256 inc HL
1D80 66 [7] 07257 ld H,(HL)
1D81 6F [4] 07258 ld L,A
07259
1D82 13 [5] 07260 inc DE ; Round size up to next multiple of 4
1D83 13 [5] 07261 inc DE
1D84 13 [5] 07262 inc DE
1D85 7B [4] 07263 ld A,E
1D86 E6FC [7] 07264 and 0FCh
1D88 5F [4] 07265 ld E,A
07266
1D89 EB [4] 07267 ex DE,HL ; Save "quantized" size
1D8A 22 00F0 [16] 07268 ld (L_00F0),HL ; DE -> block to free
07269
1D8D 2A 00DE [16] 07270 ld HL,(L_00DE) ; Point to the first free block
1D90 E5 [11] 07271 push HL
1D91 DDE1 [14] 07272 pop IX
07273
1D93 B7 [4] 07274 or A ; Is block to free lower?
1D94 ED52 [15] 07275 sbc HL,DE
1D96 3052 (1DEA) [7/12] 07276 jr NC,L_1DEA ; Yes
07277
1D98 07278 L_1D98:
1D98 DD6E00 [19] 07279 ld L,(IX+0) ; Get next address
1D9B DD6601 [19] 07280 ld H,(IX+1)
1D9E E5 [11] 07281 push HL
07282
1D9F B7 [4] 07283 or A ; Is this block still lower?
1DA0 ED52 [15] 07284 sbc HL,DE
1DA2 3004 (1DA8) [7/12] 07285 jr NC,L_1DA8 ; No
07286
1DA4 DDE1 [14] 07287 pop IX ; Point to next free block
07288
1DA6 18F0 (1D98) [12] 07289 jr L_1D98 ; Keep looking
07290
1DA8 07291 L_1DA8:
1DA8 E1 [10] 07292 pop HL ; Get block just above block to free
07293
1DA9 D5 [11] 07294 push DE ; Point to block to be freed
1DAA FDE1 [14] 07295 pop IY
07296
1DAC ED4B 00F0 [20] 07297 ld BC,(L_00F0) ; Retrieve block size
07298
1DB0 FD7102 [19] 07299 ld (IY+2),C ; Store size
1DB3 FD7003 [19] 07300 ld (IY+3),B
1DB6 FD7500 [19] 07301 ld (IY+0),L ; Store next
1DB9 FD7401 [19] 07302 ld (IY+1),H
07303
1DBC DD7300 [19] 07304 ld (IX+0),E ; Point here from previous block
1DBF DD7201 [19] 07305 ld (IX+1),D
07306
1DC2 DDE5 [15] 07307 push IX ; Point to previous block
1DC4 E1 [10] 07308 pop HL
1DC5 DD4E02 [19] 07309 ld C,(IX+2) ; Get its size
1DC8 DD4603 [19] 07310 ld B,(IX+3)
1DCB CD 1E05 [17] 07311 call L_1E05 ; Try to coalesce
1DCE 2809 (1DD9) [7/12] 07312 jr Z,L_1DD9 ; Combined?
07313
1DD0 DD5E00 [19] 07314 ld E,(IX+0) ; Point to next block
1DD3 DD5601 [19] 07315 ld D,(IX+1)
1DD6 D5 [11] 07316 push DE
1DD7 DDE1 [14] 07317 pop IX
07318
1DD9 07319 L_1DD9:
1DD9 DDE5 [15] 07320 push IX ; Set up to try combining with next block
1DDB E1 [10] 07321 pop HL
1DDC DD4E02 [19] 07322 ld C,(IX+2) ; Load size
1DDF DD4603 [19] 07323 ld B,(IX+3)
1DE2 DD5E00 [19] 07324 ld E,(IX+0) ; Load next
1DE5 DD5601 [19] 07325 ld D,(IX+1)
1DE8 181B (1E05) [12] 07326 jr L_1E05
07327
1DEA 07328 L_1DEA:
1DEA 2A 00DE [16] 07329 ld HL,(L_00DE) ; Freed block is now first free block
1DED ED53 00DE [20] 07330 ld (L_00DE),DE
1DF1 D5 [11] 07331 push DE
1DF2 DDE1 [14] 07332 pop IX
1DF4 DD7500 [19] 07333 ld (IX+0),L ; Store next
1DF7 DD7401 [19] 07334 ld (IX+1),H
1DFA ED4B 00F0 [20] 07335 ld BC,(L_00F0) ; Store size
1DFE DD7102 [19] 07336 ld (IX+2),C
1E01 DD7003 [19] 07337 ld (IX+3),B
1E04 EB [4] 07338 ex DE,HL
07339
1E05 07340 L_1E05:
1E05 09 [11] 07341 add HL,BC ; Are the blocks adjacent?
1E06 B7 [4] 07342 or A
1E07 ED52 [15] 07343 sbc HL,DE
1E09 C0 [5/11] 07344 ret NZ ; No
07345
1E0A D5 [11] 07346 push DE
1E0B FDE1 [14] 07347 pop IY
07348
1E0D 2A 00C4 [16] 07349 ld HL,(L_00C4) ; Is upper block at the top of the heap?
1E10 B7 [4] 07350 or A
1E11 ED52 [15] 07351 sbc HL,DE
1E13 281B (1E30) [7/12] 07352 jr Z,L_1E30 ; Yes
07353
1E15 FD7E00 [19] 07354 ld A,(IY+0) ; Transfer next to lower block
1E18 DD7700 [19] 07355 ld (IX+0),A
1E1B FD7E01 [19] 07356 ld A,(IY+1)
1E1E DD7701 [19] 07357 ld (IX+1),A
1E21 FD6E02 [19] 07358 ld L,(IY+2) ; Add the sizes
1E24 FD6603 [19] 07359 ld H,(IY+3)
1E27 09 [11] 07360 add HL,BC
1E28 DD7502 [19] 07361 ld (IX+2),L
1E2B DD7403 [19] 07362 ld (IX+3),H
1E2E AF [4] 07363 xor A ; Clear Z flag
07364
1E2F C9 [10] 07365 ret
07366
1E30 07367 L_1E30:
1E30 DDE5 [15] 07368 push IX ; Set new top of heap
1E32 E1 [10] 07369 pop HL
1E33 22 00C4 [16] 07370 ld (L_00C4),HL
07371
1E36 0604 [7] 07372 ld B,4 ; Zero the header
07373
1E38 07374 L_1E38:
1E38 3600 [10] 07375 ld (HL),0
1E3A 23 [5] 07376 inc HL
1E3B 10FB (1E38) [8/13] 07377 djnz L_1E38
07378
1E3D C9 [10] 07379 ret
As rewritten for the 8080:
Code:
05810 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
05811 ;
05812 ; Dispose
05813 ;
05814 ; Input:
05815 ; HL = size of block to free
05816 ; Top of stack = the address of variable containing block address
05817 ;
14E9 05818 Rq1D7B:
14E9 EB [4] 05819 xchg ; Move size to DE
05820
14EA E1 [10] 05821 pop H ; Get return address
14EB E3 [18] 05822 xthl ; Swap for address of variable
05823
14EC 7E [7] 05824 mov A,M ; Get address of the block to free
14ED 23 [5] 05825 inx H
14EE 66 [7] 05826 mov H,M
14EF 6F [5] 05827 mov L,A
05828
14F0 13 [5] 05829 inx D ; Round size up to next multiple of 4
14F1 13 [5] 05830 inx D
14F2 13 [5] 05831 inx D
14F3 7B [5] 05832 mov A,E
14F4 E6 FC [7] 05833 ani 0FCh
14F6 5F [5] 05834 mov E,A
05835
14F7 EB [4] 05836 xchg ; Save "quantized" size
14F8 22 00F0 [16] 05837 shld Vq00F0 ; DE -> block to free
05838
14FB 2A 00DE [16] 05839 lhld Vq00DE ; Point to the first free block
05840
14FE 7D [5] 05841 mov A,L ; Is block to free lower?
14FF 93 [4] 05842 sub E
1500 7C [5] 05843 mov A,H
1501 9A [4] 05844 sbb D
1502 DA 151E [10] 05845 jc Lq1D98 ; No
05846
1505 2A 00F0 [16] 05847 lhld Vq00F0 ; Load size into BC
1508 44 [5] 05848 mov B,H
1509 4D [5] 05849 mov C,L
05850
150A 2A 00DE [16] 05851 lhld Vq00DE ; Freed block is now first free block
150D EB [4] 05852 xchg
150E 22 00DE [16] 05853 shld Vq00DE
05854
1511 73 [7] 05855 mov M,E ; Store next
1512 23 [5] 05856 inx H
1513 72 [7] 05857 mov M,D
05858
1514 23 [5] 05859 inx H ; Store size
1515 71 [7] 05860 mov M,C
1516 23 [5] 05861 inx H
1517 70 [7] 05862 mov M,B
1518 2B [5] 05863 dcx H
1519 2B [5] 05864 dcx H
151A 2B [5] 05865 dcx H
05866
151B C3 1554 [10] 05867 jmp Lq1E05 ; Try to coalesce with next block
05868
151E 05869 Lq1D98:
151E 44 [5] 05870 mov B,H ; Remember current block address
151F 4D [5] 05871 mov C,L
05872
1520 7E [7] 05873 mov A,M ; Get next address
1521 23 [5] 05874 inx H
1522 66 [7] 05875 mov H,M
1523 6F [5] 05876 mov L,A
05877
1524 93 [4] 05878 sub E ; Is this block still lower?
1525 7C [5] 05879 mov A,H
1526 9A [4] 05880 sbb D
1527 DA 151E [10] 05881 jc Lq1D98 ; Yes
05882
152A E5 [11] 05883 push H ; Save address of block above block to free
05884
152B 60 [5] 05885 mov H,B ; Point to block just below block to free
152C 69 [5] 05886 mov L,C
05887
152D 73 [7] 05888 mov M,E ; Point here from previous block
152E 23 [5] 05889 inx H
152F 72 [7] 05890 mov M,D
05891
1530 E1 [10] 05892 pop H ; Refresh address of block above block to free
05893
1531 EB [4] 05894 xchg ; Point to block to free
05895
1532 73 [7] 05896 mov M,E ; Store next
1533 23 [5] 05897 inx H
1534 72 [7] 05898 mov M,D
05899
1535 EB [4] 05900 xchg ; Retrieve block size
1536 2A 00F0 [16] 05901 lhld Vq00F0
1539 EB [4] 05902 xchg
05903
153A 23 [5] 05904 inx H ; Store size
153B 73 [7] 05905 mov M,E
153C 23 [5] 05906 inx H
153D 72 [7] 05907 mov M,D
05908
153E 60 [5] 05909 mov H,B ; Point to previous block
153F 69 [5] 05910 mov L,C
05911
1540 CD 154A [17] 05912 call Lq1DD9 ; Try to coalesce with block being freed
1543 CA 154A [10] 05913 jz Lq1DD9 ; Combined?
05914
1546 7E [7] 05915 mov A,M ; Point to next block
1547 23 [5] 05916 inx H
1548 66 [7] 05917 mov H,M
1549 6F [5] 05918 mov L,A
05919
154A 05920 Lq1DD9:
154A 23 [5] 05921 inx H ; Set up to try combining with the next block
154B 23 [5] 05922 inx H
154C 23 [5] 05923 inx H
154D 46 [7] 05924 mov B,M ; Load size
154E 2B [5] 05925 dcx H
154F 4E [7] 05926 mov C,M
1550 2B [5] 05927 dcx H ; Load next
1551 56 [7] 05928 mov D,M
1552 2B [5] 05929 dcx H
1553 5E [7] 05930 mov E,M
05931
1554 05932 Lq1E05:
1554 7D [5] 05933 mov A,L ; Are the blocks adjacent?
1555 81 [4] 05934 add C
1556 BB [4] 05935 cmp E
1557 C0 [5/11] 05936 rnz ; No - low bytes do not match
1558 7D [5] 05937 mov A,L
1559 81 [4] 05938 add C
155A 7C [5] 05939 mov A,H
155B 88 [4] 05940 adc B
155C BA [4] 05941 cmp D
155D C0 [5/11] 05942 rnz ; No - high bytes do not match
05943
155E E5 [11] 05944 push H ; Stash address of lower block
05945
155F 2A 00C4 [16] 05946 lhld Vq00C4 ; Is upper block at the top of the heap?
1562 7D [5] 05947 mov A,L
1563 BB [4] 05948 cmp E
1564 C2 156C [10] 05949 jnz Lq1E15 ; No
1567 7C [5] 05950 mov A,H
1568 BA [4] 05951 cmp D
1569 CA 1582 [10] 05952 jz Lq1E30 ; Yes
05953
156C 05954 Lq1E15:
156C E1 [10] 05955 pop H ; Recover address of lower block
05956
156D 1A [7] 05957 ldax D ; Transfer next to lower block
156E 77 [7] 05958 mov M,A
156F 23 [5] 05959 inx H
1570 13 [5] 05960 inx D
1571 1A [7] 05961 ldax D
1572 77 [7] 05962 mov M,A
1573 23 [5] 05963 inx H
1574 13 [5] 05964 inx D
05965
1575 1A [7] 05966 ldax D ; Add the sizes
1576 81 [4] 05967 add C
1577 77 [7] 05968 mov M,A
1578 13 [5] 05969 inx D
1579 23 [5] 05970 inx H
157A 1A [7] 05971 ldax D
157B 88 [4] 05972 adc B
157C 77 [7] 05973 mov M,A
05974
157D 2B [5] 05975 dcx H ; Point back to the base
157E 2B [5] 05976 dcx H
157F 2B [5] 05977 dcx H
05978
1580 AF [4] 05979 xra A ; Clear Z flag
05980
1581 C9 [10] 05981 ret
05982
1582 05983 Lq1E30:
1582 E1 [10] 05984 pop H ; Recover address of lower block
05985
1583 22 00C4 [16] 05986 shld Vq00C4 ; Set new top of heap
05987
1586 06 04 [7] 05988 mvi B,4 ; Zero the header
05989
1588 05990 Lq1E38:
1588 36 00 [10] 05991 mvi M,0
158A 23 [5] 05992 inx H
158B 05 [5] 05993 dcr B
158C C2 1588 [10] 05994 jnz Lq1E38
05995
158F C9 [10] 05996 ret