Converting my Mandelbrot set program to high resolution graphics using Paul Farrow’s ARX display driver.
;
; Mandelbrot Set ARX
; v1.00 Steven Reid (c) 2024
; ARX graphics from Paul Farrow
;
; Converting my mandelbrot set to high resolution
; graphics using ARX.
;
; +++
;
; Header and startup
;
; start up stuff
org 16514 ; stored in REM at top (ZX81)
jr start ; needed for z80asm
; title and copyright (will show on listing)
copy:
db _as,_as,_m_,_b_,_s_,_a_,_r_,_x_,_as,_as
db _as,_s_,_l_,_r_,_sl,_2_,_0_,_2_,_4_,_as
db _as,$76 ; **MBSARX***SLR/2024**
sloop: db 0,0,0
scale: db 0,0,0
shift: db 0,0,0
x_val: db 0,0,0
y_val: db 0,0,0
cr_val: db 0,0,0
ci_val: db 0,0,0
zr_val: db 0,0,0
zi_val: db 0,0,0
zerox: equ 255 ; 256 pixels (0-255)
zeroy: equ 96 ; 192 pixels / 2 (0-191)
maxit: equ 15
step: equ 50
start:
; end header and startup
;
; ---
; +++
;
; Main Loop
;
; Switch to the ARX display driver.
call START_ARX_DRIVER
mainloop:
; for s=0 to 750 step 50
xor a ; set scaling loop to 0
ld hl,0
ld (sloop),a ; save scaleing loop variable
ld (sloop+1),hl
scale_loop:
; Clear the ARX display file.
ld hl,$2000
ld (hl),l
ld de,$2001
ld bc,$17ff
ldir
; scale = 1/(s+80)
ld a,(sloop) ; load scaling loop
ld hl,(sloop+1)
; .db $00,$40,$45 ;80
ld c,$45 ; load cde with 80
ld de,$4000
;; .db $00,$40,$43 ;20
;ld c,$43 ; load cde with 20
;ld de,$4000
call f24add ; s+80
ld c,a ; copy result to numerator
ld d,h
ld e,l
; .db $00,$00,$3F ;1
ld a,$3f ; set ahl to 1
ld hl,0
call f24div ; 1/(s+80) (ahl / cde)
ld (scale),a ; save result in scale
ld (scale+1),hl
; shift = -1.75*s-47
ld a,(sloop) ; load scaling loop
ld hl,(sloop+1)
;.db $00,$80,$BF ;-1.5
ld c,$bf ; -1.5
ld de,$8000
call f24mul ; -1.75*s
;.db $00,$78,$44 ;47
ld c,$44 ; 47
ld de,$7800
call f24sub ; -1.75*s-15
ld (shift),a ; save result in shift
ld (shift+1),hl
; set width loop (x)
; .db $00,$E0,$45 ;120
ld a,$45 ; 120
ld hl,$e000
ld (x_val),a ; save result in x_val
ld (x_val+1),hl
; for x = zerox to 0 step -1
ld b,zerox ; set x loop value
x_loop:
push bc
dec b
ld c,zeroy*2-1
ld a,1 ; set to plot
ld (plot_action),a
call plot_routine
; cr = (x+shift)*scale
ld a,(shift) ; load shift into cde
ld c,a
ld de,(shift+1)
ld a,(x_val) ; load x_val into ahl
ld hl,(x_val+1)
call f24add ; ahl+dce
push af
ld a,(scale) ; load scale
ld c,a
pop af
ld de,(scale+1) ; load rest of scale
call f24mul ; (x+shift)*scale
ld (cr_val),a ; save result in cr
ld (cr_val+1),hl
; set length loop (y)
xor a ; set ahl to 0
ld hl,0
ld (y_val),a ; save result in y_val
ld (y_val+1),hl
pop bc
; for y = 0 to zeroy-1
ld c,zeroy ; note starts at zeroy as we are counting down
y_loop:
push bc
; see if user wants to exit (called each run)
call check_break
; ci = (zeroy-y)*scale
ld a,(y_val) ; load cde with y_val
ld c,a
ld de,(y_val+1)
; .db $00,$80,$45 ;96
ld a,$45 ; set ahl to 96
ld hl,$8000
call f24sub ; ahl-dce (96-y)
push af
ld a,(scale) ; load scale
ld c,a
pop af
ld de,(scale+1) ; load rest of scale
call f24mul ; (zeroy-y)*scale
ld (ci_val),a ; save result in ci
ld (ci_val+1),hl
; clear zr and zi
xor a
ld hl,0
ld (zr_val),a ; zr = 0
ld (zr_val+1),hl
ld (zi_val),a ; zi = 0
ld (zi_val+1),hl
; for i = maxit to 0 step -1
ld b,maxit
iteration_loop:
push bc
; br = cr + zr*zr - zi*zi
ld a,(zr_val) ; load zr
ld hl,(zr_val+1)
ld c,a ; load zr
ld de,(zr_val+1)
call f24mul ; zr*zr
push af ; save zr*zr
push hl
ld a,(zi_val) ; load zi
ld hl,(zi_val+1)
ld c,a ; load zi
ld de,(zi_val+1)
call f24mul ; zi*zi
ld c,a ; move zi*zi to cde
ld d,h
ld e,l
pop hl
pop af ; restore zr*zr
call f24sub ; zr*zr - zi*zi
ld c,a ; load cde with zr*zr - zi*zi
ld d,h
ld e,l
ld a,(cr_val) ; load ahl with cr
ld hl,(cr_val+1)
call f24add ; cr + (zr*zr - zi*zi)
push hl
push af ; save br (will become zr)
; zi = ci + 2*zr*zi
ld a,(zi_val) ; load zi
ld c,a
ld de,(zi_val+1)
ld a,(zr_val) ; load zr
ld hl,(zr_val+1)
call f24mul ; zr*zi
; .db $00,$00,$40 ;2
ld c,$40 ; set cde to 2
ld de,0
call f24mul ; (2*zr*zi)
ld c,a ; load cde with 2*zr*zi
ld d,h
ld e,l
ld a,(ci_val) ; load ahl with ci
ld hl,(ci_val+1)
call f24add ; ci + 2*zr*zi
ld (zi_val),a ; save new zi
ld (zi_val+1),hl
; zr = br
pop af
pop hl
ld (zr_val),a ; save new zr
ld (zr_val+1),hl
ld c,a ; load zr
ld de,(zr_val+1)
call f24mul ; zr*zr
push af ; save zr*zr
push hl
ld a,(zi_val) ; load zi
ld hl,(zi_val+1)
ld c,a ; load zi
ld de,(zi_val+1)
call f24mul ; zi*zi
ld c,a ; move zi*zi to cde
ld d,h
ld e,l
pop hl
pop af ; restore zr*zr
call f24add ; zr*zr + zi*zi
; .db $00,$00,$41 ;4
ld c,$41 ; set cde to 4
ld de,0
call f24cmp ; compare!
pop bc ; restore bc
; if > 4, then plot
jp c,next_i ; ahl < cde (c) nope
jp z,next_i ; ahl = cde (z) nope
plot_point: ; yes, plot a point!
ld a,1 ; set to plot
ld (plot_action),a
jr print_pixel ; and and print it
; next i
next_i:
dec b
jp nz,iteration_loop
; passed through, so unplot
xor a ; unplot point
ld (plot_action),a
print_pixel:
pop bc ; pop x,y coordinates
push bc ; push them back for later
dec b ; sub x by 1
ld a,zeroy ; zeroy-y to (reverses plot)
sub c
ld c,a
call plot_routine
pop bc ; pop x,y coordinates
push bc ; push them back for later
dec b ; sub x by 1
ld a,zeroy-2
add a,c
ld c,a
call plot_routine
next_y:
; y_val + 1
ld a,(y_val) ; load y_val
ld hl,(y_val+1)
; .db $00,$00,$3F ;1
ld c,$3f ; set cde to 1
ld de,0
call f24add ; increment y_val
ld (y_val),a ; and save it
ld (y_val+1),hl
pop bc
dec c
jp nz,y_loop
; x_val - 1
push bc
ld a,(x_val) ; load x_val
ld hl,(x_val+1)
; .db $00,$00,$3F ;1
ld c,$3f ; set cde to 1
ld de,0
call f24sub ; decrement x_val
ld (x_val),a ; and save it
ld (x_val+1),hl
pop bc
push bc
dec b
ld c,zeroy*2-1
ld a,0 ; set to plot
ld (plot_action),a
call plot_routine
pop bc
dec b
jp nz,x_loop
ld hl,$9000 ; longer pause
call delay
; next s
; increment scaling loop
ld a,(sloop) ; load scaling loop
ld hl,(sloop+1)
;.db $00,$90,$43 ;25
ld c,$43 ; load step with 50
ld de,$9000
call f24add ; S+50
ld (sloop),a ; save scaleing loop variable
ld (sloop+1),hl
; are we done?
; .db $80,$77,$48 ;751
ld c,$48 ; load test with 751
ld de,$7780
call f24cmp
jp c,scale_loop ; not done yet! (sloop < 751)
jp mainloop ; start again!
; End of loop!
;
; +++
;
; Routines
;
; +++
; Plot Routine
;
plot_action: db 1 ; plot = 1, unplot = 0
plot_routine:
; convert coordinates from bc (yx) to de (yx)
ld d,c
ld e,b
call PLOT
ret
; end print_pixel
; ---
; ------------
; Plot a Pixel
; ------------
; Entry: D=Y coordinate (0-191).
; E=X coordinate (0-255).
; Preserves DE.
PLOT:
ld h,$01 ; The high byte of the base address of the display file divided by 32.
ld l,d ; The Y coordinate, i.e. the line number.
add hl,hl
add hl,hl
add hl,hl
add hl,hl
add hl,hl ; $2000 + (Line * 32).
ld a,e ; The X coordinate, i.e. the column/pixel number.
rrca
rrca
rrca ; Discard the pixel number.
and $1f ; Keep the column number (0-31).
or l
ld l,a ; $2000 + (Line * 32) + Column.
ld a,e ; The X coordinate.
and $07 ; Keep the pixel number (0-7).
ld b,a
inc b ; B=1-8.
ld a,$01 ; The pixel marker.
P_LOOP: rrca ; Shift the pixel into the correct bit position.
djnz P_LOOP
call TRANSLATE_DFILE_ADDRESS ; Convert from a logical display file address into an ARX display file address.
ld b,a
ld a,(plot_action)
and a
ld a,b
jr z,unplot_pixel
plot_pixel:
or (hl) ; Combine the pixel with the current display file location byte.
ld (hl),a ; Write the new pixel pattern back into the display file.
ret
unplot_pixel:
ld b,255
xor b ; Convert to a mask to remove a pixel
and (hl) ; And the mask with the current display file location byte.
ld (hl),a ; Write the new pixel pattern back into the display file.
ret
; ------------------------------------------------------------------
; Translate ARX Display File Address to Logical Display File Address
; ------------------------------------------------------------------
; Entry: HL=Logical display File address.
; Exit : HL=ARX display file address.
TRANSLATE_DFILE_ADDRESS:
rlc l ; Reorder the line and column bits.
rlc l
rlc l
ret
; ---
; ----------
; ARX Driver
; ----------
; ===========================
; Enhanced ARX Display Driver
; ===========================
; Created 13th March 2024 by Paul Farrow (comments revised 19th March 2024).
; Based on the original ARX display driver created by Andy Rea.
; You are free to use and modify this code for your own programs.
; -------------------
; ARX High Resolution
; -------------------
; Method of Operation
; -------------------
; The ARX high resolution mechanism was devised by Andy Rea in 2006 and exploits User Define Graphics (UDGs) in the 8K-16K region.
; The ZX81 hardware allows the display pixel patterns to be fetched from 0K-16K region, the 0.5K region specified by the value set
; into the I register. If RAM is populated in the 8K-16K region then the I register can be pointed at it allow UDGs. This only allows
; 64 characters to be redefined, which is enough to display a different character in every position across of 2 rows. By switching
; to a different UDG character set every 2 rows, every position within the display can be populated with its own unique character
; pattern. The ARX display driver controls the process of cycling through the different UDG character sets. The RAM populated in the
; 8K-16K region of the memory map must not be WRX enabled.
; ARX Driver Enhancements
; -----------------------
; The ARX display driver present here has a number of improvements over Andy's original ARX display driver:
; - It is robust to timing variations that can be introduced by connected devices (e.g. ZXpand) that cause vertical rotation of characters.
; - It allows faster user program operation by allowing it to run during more border lines.
; - It allows the IY and I registers to be used by the user program.
; - It outputs a VSync pulse that begins and ends aligned with the HSync pulses.
; ARX Display File Layout
; -----------------------
; Six consecutive UDG character sets form the high resolution display file. The display file is not laid out logically such that it
; progresses across the columns of the first line before moving done to the next line, and continues through all lines in this way.
; Instead the ARX display file progresses down the 8 lines for the character in row 0 column 0, then down the 8 lines for the character
; in row 0 column 1, then the lines for row 0 column 2, etc, before then moving to the start of the next row and repeating this pattern.
;
; The display file for the ARX driver presented here is 6K in size and occupies memory locations $2000 - $37FF.
;
; An address in the ARX display file is composed as follows:
;
; 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
; | 0 | 0 | 1 | R | R | R | R | R | C | C | C | C | C | L | L | L |
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
;
; R = Row number (0-23).
; C = Column number (0-31).
; L = Line number (0-7).
; Logical Display File Translation
; --------------------------------
; It can often be easier for a program to operate using a logical display file layout. Converting a logical display file address to an
; address in the ARX display file can be done by rotating the low byte leftwards to reorder the column and line bits as follows:
;
; 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
; | 0 | 0 | 1 | R | R | R | R | R | L | L | L | C | C | C | C | C |
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
;
; R = Row number (0-23).
; C = Column number (0-31).
; L = Line number (0-7).
; ----------------
; Start ARX Driver
; ----------------
START_ARX_DRIVER:
CALL AWAIT_NEW_FRAME ; Await the start of a new frame to ensure a flicker-free transition.
LD IX,ARX_DRIVER_PICTURE ; The ARX display driver will be executed upon the next activation of the display mechanism.
RET
; ---------------
; Stop ARX Driver
; ---------------
STOP_ARX_DRIVER:
CALL AWAIT_NEW_FRAME ; Await the start of a new frame to ensure a flicker-free transition.
LD A,$1E ; Revert to the standard pixel patterns in the ROM.
LD I,A
LD IX,$0281 ; The standard display driver will be executed upon the next activation of the display mechanism.
RET
; --------------------------
; Await Start of a New Frame
; --------------------------
AWAIT_NEW_FRAME:
LD HL,$4034 ; Point at the FRAMES system variable.
LD A,(HL) ; Note the initial value of the low byte of FRAMES.
ANF_LOOP:
CP (HL) ; Read the current low byte of FRAMES.
JR Z,ANF_LOOP ; Loop back until the value of FRAMES changes.
RET
; --------------------------------------------------
; Generate Main Picture Area And Begin Bottom Border
; --------------------------------------------------
; Enters here after all top border lines have been generated. First perform a delay to align with the next HSync pulse.
ARX_DRIVER_PICTURE:
LD B,$03 ; 7 Delay to ensure the LNCTR is reset when aligned with the hardware generated HSync.
ADP_DELAY:
DJNZ ADP_DELAY ; 13/8=34
; Preserve the value of the I register.
LD A,I ; 9
LD E,A ; 4 Save the current value of I, which allows the user program to use it.
; Prepare registers for exiting the driver routine.
LD A,($4028) ; 13 Fetch the number of bottom border lines from system variable MARGIN.
DEC A ; 4 Decrement the number of bottom border lines to output to compensate for the initial delay of the VSync routine.
LD D,A ; 4 Save the number of bottom border lines.
LD IX,ARX_DRIVER_VSYNC ; 14 Set the display vector address to point at the VSync pulse generation routine.
; Initialise registers required by the ARX display driver.
LD A,$20 ; 7 The first character set begins at $2000.
LD I,A ; 9
LD BC,$0C08 ; 10 B=Number of character sets. C=Number of lines in a row.
; Force the LNCTR to reset so that characters are output starting with their top line.
IN A,($FE) ; 11 Force the LNCTR to 0 aligned with the start of the hardware HSync pulse. Also sets the video output to black.
; Enter a loop to output the 8 lines of a row of characters. Each iteration takes 207 T-cycles.
ADP_ROW1_LOOP:
OUT ($FF),A ; 11 Set the video output back to white. Doubles as a delay on subsequent loop iterations.
LD H,$00 ; 7 Delay to ensure the picture begins with the same left border size as the standard display.
LD L,H ; 4 HL holds $0000 which ensures the CP (HL) instructions in later delays do not accidentally invoke any RAM based memory mapped devices.
LD A,I ; 9 Restore the value of I into A since this will be incremented by 2 to advance to the next character set.
L41C7: CALL DISPLAY_ROW1 + $8000 ; 17+(32*4)+10=155 Output a line of the row by 'executing' the echo of it.
DEC C ; 4 Decrement the count of the number of lines in the row.
JR Z,ADP_ROW1_DONE ; 12/7 Jump ahead if all lines of the row have been output.
JP ADP_ROW1_LOOP ; 10 Loop back to output the next line of the row.
; Move on to outputting the second row of characters for the current character set.
ADP_ROW1_DONE:
LD C,$08 ; 7 Re-initialise with the number of lines in a row.
; Enter a loop to output the 8 lines of a row of characters. Each iteration takes 207 T-cycles.
ADP_ROW2_LOOP:
ADD HL,HL ; 11 Delay.
ADD HL,HL ; 11 Delay.
CP (HL) ; 7 Delay. This will read from address $0000 and so will not accidentally invoke any RAM based memory mapped devices.
CALL DISPLAY_ROW2 + $8000 ; 17+(32*4)+10=155 Output a line of the row by 'executing' the echo of it.
DEC C ; 4 Decrement the count of the number of lines in the row.
JR Z,ADP_ROW2_DONE ; 12/7 Jump ahead if all lines of the row have been output.
JR ADP_ROW2_LOOP ; 12 Loop back to output the next line of the row.
; Two rows have been output using the current character set, so now move onto the next character set.
ADP_ROW2_DONE:
ADD A,$02 ; 7 Advance to the next character set.
LD I,A ; 9
LD C,$08 ; 7 Re-initialise with the number of lines in a row.
DJNZ L41C7 ; 13/8 Loop back to render the next character set.
; All lines of the main picture area have been output.
LD A,E
LD I,A ; Restore the original value of I.
LD A,D ; Retrieve the number of bottom border lines.
JP $029E ; Enable the NMI generator and return to user program, which will be interrupted as the bottom border is being generated.
; -----------------------------------------
; Generate VSync Pulse And Begin Top Border
; -----------------------------------------
; Returns here after the bottom border has been generated.
ARX_DRIVER_VSYNC:
LD B,$09 ; 7 Delay to ensure the VSync pulse begins aligned with the end of the next hardware generated HSync.
ADV_DELAY:
DJNZ ADV_DELAY ; 13/8=112
PUSH IY ; 15 Save the current value of IY, which allows the user program to use it.
LD IY,$4000 ; 14 Point at the start of the system variables, which is required by the VSync ROM routine.
CALL $0220 ; Generate the VSync pulse and then start generating the top border lines.
; Immediately returns back to here but with the NMI generator now switched on. The IX register now holds $0281.
POP IY ; Restore the original value of IY.
LD IX,ARX_DRIVER_PICTURE ; Set the display vector address to point at the main picture generation routine.
JP $02A2 ; Enable the NMI generator and return to the user program, which will be interrupted as the top border is being generated.
; ----------------------
; Display Row Characters
; ----------------------
; The two rows of characters rendered using each character set.
DISPLAY_ROW1:
DEFB $00, $01, $02, $03, $04, $05, $06, $07, $08, $09, $0A, $0B, $0C, $0D, $0E, $0F, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $1A, $1B, $1C, $1D, $1E, $1F
RET
DISPLAY_ROW2:
DEFB $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $2A, $2B, $2C, $2D, $2E, $2F, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $3A, $3B, $3C, $3D, $3E, $3F
RET
;
; Break
;
; preserves state, but will exit if SPACE is pushed
check_break:
exx ; save register states
; did the player press break key (space)?
call $0f46 ; was break pressed? (break-1 ROM routine)
jr nc,break ; no, exit as normal
exx ; restore registers
ret ; and return
; yes, exit the program as normal
break:
; Switch to the standard display driver.
call STOP_ARX_DRIVER
rst $0008 ; call ERROR-1 reset
db $ff ; with error code 0 (normal exit)
;
; Delay
;
; set bc to speed
; uses check_break to exit
delay_count: dw $0000
delay:
ld (delay_count),hl ; save delay
call check_break
; check if done
ld hl,(delay_count) ; grab what to test
dec hl ; subtract 1
ld a,h ; check if done
or l
jr nz,delay ; not zero, keep going!
ret ; pause is done!
; +++
; Floating math
;
; these are from: https://github.com/Zeda/z80float/blob/master/f24/f24div.z80
f24cmp:
;returns the flags for float AHL minus float CDE
; AHL >= CDE, nc
; AHL < CDE, c
; AHL == CDE, z (and nc)
;
;Note:
; This allows some wiggle room in the bottom two bits. For example, if the two
; exponents are the same and the two significands differ by at most 3, they are
; considered equal.
;
;Note:
; NaN is a special case. This routines returns that NaN= 15
;check if (B&7F) > 14 + (A&7F)
ld c,a ;new exponent, need to save the sign for later comparison
res 7,b
and $7f
add a,14
sub b
jr nc,f24cmp_skip_nc
xor a
ret
f24cmp_skip_nc:
;otherwise, not equal, so let's return the sign in c and nz
ld a,c
return_nz_sign_a:
or $7f
add a,a
ret
f24cmp_special:
ld a,h
or l
ccf
ret nz
;so the first op is inf
;if second of is finite, return the sign of B in carry and nz
ld a,c
and $7f
inc a
ld a,b
jp p,return_nz_sign_a
;second op is either NaN or inf
ld a,d
or e
ret nz
; op1 op2 result
; 7F 7F z, nc
; 7F FF nz,nc
; FF 7F nz,c
; FF FF z, nc
ld a,c
cp b
ret
f24add:
;AHL + CDE ==> AHL
;Destroys BC,DE
;
;save A
ld b,a
;check for special values
and $7f
jr nz,return_CDE_skip
return_CDE:
ld a,c
ex de,hl
ret
return_CDE_skip:
inc a
jp m,f24add_op1_inf_nan
ld a,c
;check for special values
and $7f
jp z,return_exp_b
inc a
jp m,return_CDE
ld a,b
xor c
jp m,f24add_subtract
;we need to add
call f24add_reorder
jr z,f24add_add_same_exp
ret nc
push bc
call rshift_1DE
sla b
adc hl,de
;if carry is reset, then we are all good :)
pop de
ld a,d
ret nc
;otherwise, we need to increment the sign and see if it overflows to inf
and $7f
cp $7e
ld a,d
jr z,f24_return_inf
inc a
;we also need to shift a 0 down into the HL
srl h
rr l
ret nc
inc hl
ret
f24add_add_same_exp:
ld a,b
and $7f
cp $7e
ld a,b
jr z,f24_return_inf
inc a
add hl,de
rr h
rr l
ret nc
inc l
ret nz
inc h
ret nz
inc a
ret
f24_return_inf:
or %01111111
ld hl,0
ret
f24add_subtract:
call f24add_reorder
jr z,f24add_subtract_same_exp
ret nc
push bc
call rshift_1DE
sub c
ld c,a
ld a,0
sbc a,b
ld b,a
sbc hl,de
;if carry is not set, then we are all good :)
pop de
ld a,d
ret nc
;otherwise, the implicit bit is set to 0, so we need to renormalize
normalize_D_HLBC:
;D is the sign+exponent
;HLBC is the significand
;returns AHLBC
;make sure HLBC is not 0
ld a,h
or l
or b
or c
ret z
ld a,d
normalize_D_HLBC_nonzero:
;save the sign
add a,a
push af
rrca
f24add_loop:
dec a
jr z,f24add_skip_ahead
sla c
rl b
adc hl,hl
jp nc,f24add_loop
;now round
sla c
ld bc,0
adc hl,bc
;if carry is set, then the implicit bit is 2, and the rest of the exponent is 0
;so we can just increment A and keep HL as 0
adc a,b
add a,a
f24add_skip_ahead:
ld d,a
pop af
ld a,d
rra
ret
f24add_subtract_same_exp:
;subtract the significands
ld a,b
; or a
sbc hl,de
;if zero, then the result is zero, but we'll keep the same sign
jr nz,f24_skip_ahead_nz
and %10000000
ret
f24_skip_ahead_nz:
;if the carry flag is set, then we need to change the sign of the output
;and negate the significand. if reset, then we still need to normalize and whatnot
ld bc,0
jr nc,normalize_D_HLBC_nonzero
xor $80
ld d,a
xor a
sub l
ld l,a
sbc a,a
sub h
ld h,a
ld a,d
jr normalize_D_HLBC_nonzero
f24add_reorder:
xor c
rlc c
rla
;Want to rearrange so that A-C>=0
sub c
ret z
jr nc,f24add_reorder_nc
neg
;A is the difference in exponents
rrc c
ld b,c
ex de,hl
f24add_reorder_nc:
;A is how many bits to shift DE right
;B is the sign+exponent of the result
or a
rra
cp 18
ret c
return_exp_b:
ld a,b
ret
f24add_op1_inf_nan:
ld a,h
or l
jr nz,return_exp_b
;so op1 is +inf or -inf
;If op2 is finite, then just return op1
or c
jr z,return_exp_b
inc a
add a,a
jr nz,return_exp_b
;if op2 is NaN, return NaN
ld a,d
or e
ld a,c
jr nz,f24add_op1_inf_nan_nz
;so |op1| and |op2| are inf
;if they have the same sign, fine, else return NaN
cp b
ret z
f24add_op1_inf_nan_nz:
dec hl
ret
rshift_1DE:
ld bc,0
scf
rshift_1DE_loop:
rr d
rr e
rr b
rr c
dec a
jr nz,rshift_1DE_loop
ret
f24sub:
;AHL - CDE ==> AHL
;Destroys BC,DE
;
ld b,a
ld a,c
xor $80
ld c,a
ld a,b
jp f24add
f24rsub:
;-AHL + CDE ==> AHL
;Destroys BC,DE
;
xor $80
jp f24add
f24mul:
;AHL * CDE ==> AHL
;Destroys BC,DE
;
;put the output sign in the top bit of A
ld b,a
ld a,c
and $80
xor b
;check for special values
;NaN*x ==> NaN
;0*fin ==> 0
;0*inf ==> NaN
;inf*fin ==> inf
;inf*inf ==> inf
;save A
ld b,a
and $7f
jr z,f24mul_op1_0
inc a
jp m,f24mul_op1_inf_nan
;so the first value is finite
ld a,c
and $7f
ld c,a
ret z
inc a
jp m,return_CDE
;upper bit of B is the output sign
;first approximation of the exponent is
; (B&7F) + (C&7F) - 63
ld a,b
and $7f
add a,c
sub 63
jr nc,f24mul_nc
xor a ;underflowed, so return 0
ret
f24mul_nc:
cp $7f
jr c,f24mul_return_inf_carry
f24mul_return_inf:
ld a,b
or %01111111
ld hl,0 ;overflow so return inf
ret
f24mul_return_inf_carry:
xor b
and $7f
xor b
f24mul_significand:
;save the exponent
push af
;now compute (1.HL * 1.DE)
; = (1+.HL)(1+.DE)
; = 1+.HL+.DE+.HL*.DE
ld b,h
ld c,l
push hl
push de
call mul16
;result is .DEHL
;we can discard HL, but first round
xor a
sla h
ex de,hl
pop bc
adc hl,bc
adc a,0
pop bc
add hl,bc
adc a,0
rra
;now 1+A.HL is the significand
pop bc ;B is the exponent
ld a,b
ret z
ccf
rr h
rr l
inc a
inc a
add a,a
jr z,f24mul_return_inf
rra
dec a
ret
f24mul_op1_0:
ld a,c
and $7f
ret z
inc a
jp m,f24mul_return_NaN
xor a
ret
f24mul_op1_inf_nan:
ld a,h
or l
ld a,b
ret nz ;NaN
;inf*0 is NaN
ld a,c
and $7f
jr nz,f24mul_return_NaN_nz
f24mul_return_NaN:
dec a ;inf*0
ld h,a ;=
ld l,a ;NaN
ret
f24mul_return_NaN_nz:
inc a
jp m,f24mul_return_NaN_m
ld a,b ;returning inf
ret
f24mul_return_NaN_m:
;op1 is inf
;op2 is is either NaN or inf
; inf*NaN ==> NaN
; inf*inf ==> inf
;so just return op2's significand
ld a,c
ex de,hl
ret
;This was made by Runer112
;Tested by jacobly
mul16:
;BC*DE --> DEHL
; ~544.887cc as calculated in jacobly's test
;min: 214cc (DE = 1)
;max: 667cc
;avg: 544.4507883cc however, deferring to jacobly's result as mine may have math issues ?
;177 bytes
ld a,d
ld d,0
ld h,b
ld l,c
add a,a
jr c,Mul_BC_DE_DEHL_Bit14
add a,a
jr c,Mul_BC_DE_DEHL_Bit13
add a,a
jr c,Mul_BC_DE_DEHL_Bit12
add a,a
jr c,Mul_BC_DE_DEHL_Bit11
add a,a
jr c,Mul_BC_DE_DEHL_Bit10
add a,a
jr c,Mul_BC_DE_DEHL_Bit9
add a,a
jr c,Mul_BC_DE_DEHL_Bit8
add a,a
jr c,Mul_BC_DE_DEHL_Bit7
ld a,e
and %11111110
add a,a
jr c,Mul_BC_DE_DEHL_Bit6
add a,a
jr c,Mul_BC_DE_DEHL_Bit5
add a,a
jr c,Mul_BC_DE_DEHL_Bit4
add a,a
jr c,Mul_BC_DE_DEHL_Bit3
add a,a
jr c,Mul_BC_DE_DEHL_Bit2
add a,a
jr c,Mul_BC_DE_DEHL_Bit1
add a,a
jr c,Mul_BC_DE_DEHL_Bit0
rr e
ret c
ld h,d
ld l,e
ret
Mul_BC_DE_DEHL_Bit14:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit13
add hl,bc
adc a,d
Mul_BC_DE_DEHL_Bit13:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit12
add hl,bc
adc a,d
Mul_BC_DE_DEHL_Bit12:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit11
add hl,bc
adc a,d
Mul_BC_DE_DEHL_Bit11:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit10
add hl,bc
adc a,d
Mul_BC_DE_DEHL_Bit10:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit9
add hl,bc
adc a,d
Mul_BC_DE_DEHL_Bit9:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit8
add hl,bc
adc a,d
Mul_BC_DE_DEHL_Bit8:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit7
add hl,bc
adc a,d
Mul_BC_DE_DEHL_Bit7:
ld d,a
ld a,e
and %11111110
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit6
add hl,bc
adc a,0
Mul_BC_DE_DEHL_Bit6:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit5
add hl,bc
adc a,0
Mul_BC_DE_DEHL_Bit5:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit4
add hl,bc
adc a,0
Mul_BC_DE_DEHL_Bit4:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit3
add hl,bc
adc a,0
Mul_BC_DE_DEHL_Bit3:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit2
add hl,bc
adc a,0
Mul_BC_DE_DEHL_Bit2:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit1
add hl,bc
adc a,0
Mul_BC_DE_DEHL_Bit1:
add hl,hl
adc a,a
jr nc,Mul_BC_DE_DEHL_Bit0
add hl,bc
adc a,0
Mul_BC_DE_DEHL_Bit0:
add hl,hl
adc a,a
jr c,Mul_BC_DE_DEHL_FunkyCarry
rr e
ld e,a
ret nc
add hl,bc
ret nc
inc e
ret nz
inc d
ret
Mul_BC_DE_DEHL_FunkyCarry:
inc d
rr e
ld e,a
ret nc
add hl,bc
ret nc
inc e
ret
;f24inv:
ld c,a
ex de,hl
ld a,$3f
ld hl,0
f24div:
;AHL * CDE ==> AHL
;Destroys BC,DE
;
;put the output sign in B
ld b,a
xor c
add a,a
ld a,b
rla
rrca
ld b,a
;check for special values
;NaN/x ==> NaN
;0/fin ==> 0
; 0/0 ==> NaN
;inf/inf ==> NaN
;inf/x ==> inf
;x/NaN ==> NaN
;x/inf ==> 0
;x/0 ==> NaN
and $7f
jp z,f24div_0_x
inc a
jp m,f24div_infnan_x
ld a,c
and $7f
jr nz,f24div_infnan_x_nz
dec a
ld h,a
ld l,a
ret
f24div_infnan_x_nz:
inc a
jp m,f24div_x_infnan
;upper bit of B is the output sign
;first approximation of the exponent is
; (B&7F) - (C&7F) + 63
res 7,c
ld a,b
and $7f
add a,63
sub c
jr nc,f24div_skip_nc
xor a ;underflowed, so return 0
ret
f24div_skip_nc:
cp $7f
jr c,f24div_inf
f24div_return_inf:
ld a,b
or %01111111
ld hl,0 ;overflow so return inf
ret
f24div_inf:
;now compute (1.HL / 1.DE)
; = (1+.HL)/(1+.DE)
; want 1.HL>1.DE, because then result is going to be 1.x
;so we can end up doing (.HL-.DE)/(1.DE) to 16 bits precision
or a
ld c,0 ;top bit of 1.HL-1.DE
sbc hl,de
jr nc,f24div_ready
;if carry is set, then DE was the larger of the two
;so we need to decrement the exponent and do
;(HL+DE)*2-DE
dec a ;decrement exponent
ret z ;return 0 if underflowed
add hl,de
add hl,hl
rl c
inc c
sbc hl,de
jr nc,f24div_ready
dec c
f24div_ready:
;C.HL is the numerator, 1.DE is the denominator
;A is the exponent, B[7] is the sign
;save the exponent and sign
push bc
push af
;we can now commence 16 iterations of this division
call fdiv24_div16
pop de
pop bc
adc a,d
jp p,f24div_return_positive
f24div_return_NaN:
dec a
ld h,a
ld l,a
f24div_return_positive:
xor b
and $7f
xor b
ret
fdiv24_div16:
;negate the divisior for more efficient division
;(16-bit addition is cheaper than subtraction)
xor a
sub e
ld e,a
ld a,0
sbc a,d
ld d,a
sbc a,a
dec a
ld b,a
ld a,c
call fdiv24_div8
rl c
push bc
call fdiv24_div8
rl c
;check if 2*A.HL>1.DE
add hl,hl
adc a,a
add hl,de
adc a,b
pop hl
ld h,l
ld l,c
ld bc,0
ld a,b
adc hl,bc
ret
fdiv24_div8:
call fdiv24_div4
fdiv24_div4:
call fdiv24_div2
fdiv24_div2:
call fdiv24_div1
fdiv24_div1:
rl c
add hl,hl
adc a,a
ret z
add hl,de
adc a,b
ret c
sbc hl,de
sbc a,b
ret
f24div_0_x:
;make sure we aren't trying 0/NaN or 0/0
ld a,c
and $7f
jr z,f24div_return_NaN
inc a
jp m,f24div_x_infnan
xor a
ret
f24div_x_infnan:
ld a,d
or e
ret z
ld a,c
ex de,hl
ret
f24div_infnan_x:
ld a,h
or l
ld a,b
ret nz
;make sure x is not inf NaN or 0
ld a,c
and $7f
jr z,f24div_return_NaN
inc a
jp m,f24div_return_NaN
ld a,b
ret
;
; ---
; +++
;
; Data and Defines
;
; ZX81 system vars
d_file: equ $400c
df_cc: equ 16398
last_k: equ 16421
margin: equ 16424
s_posn: equ 16441
frames: equ 16436
; ZX81 ROM functions
kscan: equ $02bb
findchar: equ $07bd
stop: equ $0cdc
slow: equ $0f2b
fast: equ $02e7
save: equ $02f9
printat: equ $08f5
pause: equ $0f35
cls: equ $0a2a
; ZX81 Characters (not ASCII)
_sp: equ $00
_qu: equ $0b
_lb: equ $0c
_dl: equ $0d
_cl: equ $0e
_lp: equ $10
_rp: equ $11
_gt: equ $12
_lt: equ $13
_eq: equ $14
_pl: equ $15
_mi: equ $16
_as: equ $17
_sl: equ $18
_sc: equ $19
_cm: equ $1a
_pr: equ $1b
_0_: equ $1c
_1_: equ $1d
_2_: equ $1e
_3_: equ $1f
_4_: equ $20
_5_: equ $21
_6_: equ $22
_7_: equ $23
_8_: equ $24
_9_: equ $25
_a_: equ $26
_b_: equ $27
_c_: equ $28
_d_: equ $29
_e_: equ $2a
_f_: equ $2b
_g_: equ $2c
_h_: equ $2d
_i_: equ $2e
_j_: equ $2f
_k_: equ $30
_l_: equ $31
_m_: equ $32
_n_: equ $33
_o_: equ $34
_p_: equ $35
_q_: equ $36
_r_: equ $37
_s_: equ $38
_t_: equ $39
_u_: equ $3a
_v_: equ $3b
_w_: equ $3c
_x_: equ $3d
_y_: equ $3e
_z_: equ $3f
; end defines
; ---