Converting my Mandelbrot set program to high resolution graphics using Paul Farrow’s ARX display driver.
; ; Mandelbrot Set ARX ; v1.00 Steven Reid (c) 2024 ; ARX graphics from Paul Farrow ; ; Converting my mandelbrot set to high resolution ; graphics using ARX. ; ; +++ ; ; Header and startup ; ; start up stuff org 16514 ; stored in REM at top (ZX81) jr start ; needed for z80asm ; title and copyright (will show on listing) copy: db _as,_as,_m_,_b_,_s_,_a_,_r_,_x_,_as,_as db _as,_s_,_l_,_r_,_sl,_2_,_0_,_2_,_4_,_as db _as,$76 ; **MBSARX***SLR/2024** sloop: db 0,0,0 scale: db 0,0,0 shift: db 0,0,0 x_val: db 0,0,0 y_val: db 0,0,0 cr_val: db 0,0,0 ci_val: db 0,0,0 zr_val: db 0,0,0 zi_val: db 0,0,0 zerox: equ 255 ; 256 pixels (0-255) zeroy: equ 96 ; 192 pixels / 2 (0-191) maxit: equ 15 step: equ 50 start: ; end header and startup ; ; --- ; +++ ; ; Main Loop ; ; Switch to the ARX display driver. call START_ARX_DRIVER mainloop: ; for s=0 to 750 step 50 xor a ; set scaling loop to 0 ld hl,0 ld (sloop),a ; save scaleing loop variable ld (sloop+1),hl scale_loop: ; Clear the ARX display file. ld hl,$2000 ld (hl),l ld de,$2001 ld bc,$17ff ldir ; scale = 1/(s+80) ld a,(sloop) ; load scaling loop ld hl,(sloop+1) ; .db $00,$40,$45 ;80 ld c,$45 ; load cde with 80 ld de,$4000 ;; .db $00,$40,$43 ;20 ;ld c,$43 ; load cde with 20 ;ld de,$4000 call f24add ; s+80 ld c,a ; copy result to numerator ld d,h ld e,l ; .db $00,$00,$3F ;1 ld a,$3f ; set ahl to 1 ld hl,0 call f24div ; 1/(s+80) (ahl / cde) ld (scale),a ; save result in scale ld (scale+1),hl ; shift = -1.75*s-47 ld a,(sloop) ; load scaling loop ld hl,(sloop+1) ;.db $00,$80,$BF ;-1.5 ld c,$bf ; -1.5 ld de,$8000 call f24mul ; -1.75*s ;.db $00,$78,$44 ;47 ld c,$44 ; 47 ld de,$7800 call f24sub ; -1.75*s-15 ld (shift),a ; save result in shift ld (shift+1),hl ; set width loop (x) ; .db $00,$E0,$45 ;120 ld a,$45 ; 120 ld hl,$e000 ld (x_val),a ; save result in x_val ld (x_val+1),hl ; for x = zerox to 0 step -1 ld b,zerox ; set x loop value x_loop: push bc dec b ld c,zeroy*2-1 ld a,1 ; set to plot ld (plot_action),a call plot_routine ; cr = (x+shift)*scale ld a,(shift) ; load shift into cde ld c,a ld de,(shift+1) ld a,(x_val) ; load x_val into ahl ld hl,(x_val+1) call f24add ; ahl+dce push af ld a,(scale) ; load scale ld c,a pop af ld de,(scale+1) ; load rest of scale call f24mul ; (x+shift)*scale ld (cr_val),a ; save result in cr ld (cr_val+1),hl ; set length loop (y) xor a ; set ahl to 0 ld hl,0 ld (y_val),a ; save result in y_val ld (y_val+1),hl pop bc ; for y = 0 to zeroy-1 ld c,zeroy ; note starts at zeroy as we are counting down y_loop: push bc ; see if user wants to exit (called each run) call check_break ; ci = (zeroy-y)*scale ld a,(y_val) ; load cde with y_val ld c,a ld de,(y_val+1) ; .db $00,$80,$45 ;96 ld a,$45 ; set ahl to 96 ld hl,$8000 call f24sub ; ahl-dce (96-y) push af ld a,(scale) ; load scale ld c,a pop af ld de,(scale+1) ; load rest of scale call f24mul ; (zeroy-y)*scale ld (ci_val),a ; save result in ci ld (ci_val+1),hl ; clear zr and zi xor a ld hl,0 ld (zr_val),a ; zr = 0 ld (zr_val+1),hl ld (zi_val),a ; zi = 0 ld (zi_val+1),hl ; for i = maxit to 0 step -1 ld b,maxit iteration_loop: push bc ; br = cr + zr*zr - zi*zi ld a,(zr_val) ; load zr ld hl,(zr_val+1) ld c,a ; load zr ld de,(zr_val+1) call f24mul ; zr*zr push af ; save zr*zr push hl ld a,(zi_val) ; load zi ld hl,(zi_val+1) ld c,a ; load zi ld de,(zi_val+1) call f24mul ; zi*zi ld c,a ; move zi*zi to cde ld d,h ld e,l pop hl pop af ; restore zr*zr call f24sub ; zr*zr - zi*zi ld c,a ; load cde with zr*zr - zi*zi ld d,h ld e,l ld a,(cr_val) ; load ahl with cr ld hl,(cr_val+1) call f24add ; cr + (zr*zr - zi*zi) push hl push af ; save br (will become zr) ; zi = ci + 2*zr*zi ld a,(zi_val) ; load zi ld c,a ld de,(zi_val+1) ld a,(zr_val) ; load zr ld hl,(zr_val+1) call f24mul ; zr*zi ; .db $00,$00,$40 ;2 ld c,$40 ; set cde to 2 ld de,0 call f24mul ; (2*zr*zi) ld c,a ; load cde with 2*zr*zi ld d,h ld e,l ld a,(ci_val) ; load ahl with ci ld hl,(ci_val+1) call f24add ; ci + 2*zr*zi ld (zi_val),a ; save new zi ld (zi_val+1),hl ; zr = br pop af pop hl ld (zr_val),a ; save new zr ld (zr_val+1),hl ld c,a ; load zr ld de,(zr_val+1) call f24mul ; zr*zr push af ; save zr*zr push hl ld a,(zi_val) ; load zi ld hl,(zi_val+1) ld c,a ; load zi ld de,(zi_val+1) call f24mul ; zi*zi ld c,a ; move zi*zi to cde ld d,h ld e,l pop hl pop af ; restore zr*zr call f24add ; zr*zr + zi*zi ; .db $00,$00,$41 ;4 ld c,$41 ; set cde to 4 ld de,0 call f24cmp ; compare! pop bc ; restore bc ; if > 4, then plot jp c,next_i ; ahl < cde (c) nope jp z,next_i ; ahl = cde (z) nope plot_point: ; yes, plot a point! ld a,1 ; set to plot ld (plot_action),a jr print_pixel ; and and print it ; next i next_i: dec b jp nz,iteration_loop ; passed through, so unplot xor a ; unplot point ld (plot_action),a print_pixel: pop bc ; pop x,y coordinates push bc ; push them back for later dec b ; sub x by 1 ld a,zeroy ; zeroy-y to (reverses plot) sub c ld c,a call plot_routine pop bc ; pop x,y coordinates push bc ; push them back for later dec b ; sub x by 1 ld a,zeroy-2 add a,c ld c,a call plot_routine next_y: ; y_val + 1 ld a,(y_val) ; load y_val ld hl,(y_val+1) ; .db $00,$00,$3F ;1 ld c,$3f ; set cde to 1 ld de,0 call f24add ; increment y_val ld (y_val),a ; and save it ld (y_val+1),hl pop bc dec c jp nz,y_loop ; x_val - 1 push bc ld a,(x_val) ; load x_val ld hl,(x_val+1) ; .db $00,$00,$3F ;1 ld c,$3f ; set cde to 1 ld de,0 call f24sub ; decrement x_val ld (x_val),a ; and save it ld (x_val+1),hl pop bc push bc dec b ld c,zeroy*2-1 ld a,0 ; set to plot ld (plot_action),a call plot_routine pop bc dec b jp nz,x_loop ld hl,$9000 ; longer pause call delay ; next s ; increment scaling loop ld a,(sloop) ; load scaling loop ld hl,(sloop+1) ;.db $00,$90,$43 ;25 ld c,$43 ; load step with 50 ld de,$9000 call f24add ; S+50 ld (sloop),a ; save scaleing loop variable ld (sloop+1),hl ; are we done? ; .db $80,$77,$48 ;751 ld c,$48 ; load test with 751 ld de,$7780 call f24cmp jp c,scale_loop ; not done yet! (sloop < 751) jp mainloop ; start again! ; End of loop! ; ; +++ ; ; Routines ; ; +++ ; Plot Routine ; plot_action: db 1 ; plot = 1, unplot = 0 plot_routine: ; convert coordinates from bc (yx) to de (yx) ld d,c ld e,b call PLOT ret ; end print_pixel ; --- ; ------------ ; Plot a Pixel ; ------------ ; Entry: D=Y coordinate (0-191). ; E=X coordinate (0-255). ; Preserves DE. PLOT: ld h,$01 ; The high byte of the base address of the display file divided by 32. ld l,d ; The Y coordinate, i.e. the line number. add hl,hl add hl,hl add hl,hl add hl,hl add hl,hl ; $2000 + (Line * 32). ld a,e ; The X coordinate, i.e. the column/pixel number. rrca rrca rrca ; Discard the pixel number. and $1f ; Keep the column number (0-31). or l ld l,a ; $2000 + (Line * 32) + Column. ld a,e ; The X coordinate. and $07 ; Keep the pixel number (0-7). ld b,a inc b ; B=1-8. ld a,$01 ; The pixel marker. P_LOOP: rrca ; Shift the pixel into the correct bit position. djnz P_LOOP call TRANSLATE_DFILE_ADDRESS ; Convert from a logical display file address into an ARX display file address. ld b,a ld a,(plot_action) and a ld a,b jr z,unplot_pixel plot_pixel: or (hl) ; Combine the pixel with the current display file location byte. ld (hl),a ; Write the new pixel pattern back into the display file. ret unplot_pixel: ld b,255 xor b ; Convert to a mask to remove a pixel and (hl) ; And the mask with the current display file location byte. ld (hl),a ; Write the new pixel pattern back into the display file. ret ; ------------------------------------------------------------------ ; Translate ARX Display File Address to Logical Display File Address ; ------------------------------------------------------------------ ; Entry: HL=Logical display File address. ; Exit : HL=ARX display file address. TRANSLATE_DFILE_ADDRESS: rlc l ; Reorder the line and column bits. rlc l rlc l ret ; --- ; ---------- ; ARX Driver ; ---------- ; =========================== ; Enhanced ARX Display Driver ; =========================== ; Created 13th March 2024 by Paul Farrow (comments revised 19th March 2024). ; Based on the original ARX display driver created by Andy Rea. ; You are free to use and modify this code for your own programs. ; ------------------- ; ARX High Resolution ; ------------------- ; Method of Operation ; ------------------- ; The ARX high resolution mechanism was devised by Andy Rea in 2006 and exploits User Define Graphics (UDGs) in the 8K-16K region. ; The ZX81 hardware allows the display pixel patterns to be fetched from 0K-16K region, the 0.5K region specified by the value set ; into the I register. If RAM is populated in the 8K-16K region then the I register can be pointed at it allow UDGs. This only allows ; 64 characters to be redefined, which is enough to display a different character in every position across of 2 rows. By switching ; to a different UDG character set every 2 rows, every position within the display can be populated with its own unique character ; pattern. The ARX display driver controls the process of cycling through the different UDG character sets. The RAM populated in the ; 8K-16K region of the memory map must not be WRX enabled. ; ARX Driver Enhancements ; ----------------------- ; The ARX display driver present here has a number of improvements over Andy's original ARX display driver: ; - It is robust to timing variations that can be introduced by connected devices (e.g. ZXpand) that cause vertical rotation of characters. ; - It allows faster user program operation by allowing it to run during more border lines. ; - It allows the IY and I registers to be used by the user program. ; - It outputs a VSync pulse that begins and ends aligned with the HSync pulses. ; ARX Display File Layout ; ----------------------- ; Six consecutive UDG character sets form the high resolution display file. The display file is not laid out logically such that it ; progresses across the columns of the first line before moving done to the next line, and continues through all lines in this way. ; Instead the ARX display file progresses down the 8 lines for the character in row 0 column 0, then down the 8 lines for the character ; in row 0 column 1, then the lines for row 0 column 2, etc, before then moving to the start of the next row and repeating this pattern. ; ; The display file for the ARX driver presented here is 6K in size and occupies memory locations $2000 - $37FF. ; ; An address in the ARX display file is composed as follows: ; ; 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ ; | 0 | 0 | 1 | R | R | R | R | R | C | C | C | C | C | L | L | L | ; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ ; ; R = Row number (0-23). ; C = Column number (0-31). ; L = Line number (0-7). ; Logical Display File Translation ; -------------------------------- ; It can often be easier for a program to operate using a logical display file layout. Converting a logical display file address to an ; address in the ARX display file can be done by rotating the low byte leftwards to reorder the column and line bits as follows: ; ; 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 ; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ ; | 0 | 0 | 1 | R | R | R | R | R | L | L | L | C | C | C | C | C | ; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+ ; ; R = Row number (0-23). ; C = Column number (0-31). ; L = Line number (0-7). ; ---------------- ; Start ARX Driver ; ---------------- START_ARX_DRIVER: CALL AWAIT_NEW_FRAME ; Await the start of a new frame to ensure a flicker-free transition. LD IX,ARX_DRIVER_PICTURE ; The ARX display driver will be executed upon the next activation of the display mechanism. RET ; --------------- ; Stop ARX Driver ; --------------- STOP_ARX_DRIVER: CALL AWAIT_NEW_FRAME ; Await the start of a new frame to ensure a flicker-free transition. LD A,$1E ; Revert to the standard pixel patterns in the ROM. LD I,A LD IX,$0281 ; The standard display driver will be executed upon the next activation of the display mechanism. RET ; -------------------------- ; Await Start of a New Frame ; -------------------------- AWAIT_NEW_FRAME: LD HL,$4034 ; Point at the FRAMES system variable. LD A,(HL) ; Note the initial value of the low byte of FRAMES. ANF_LOOP: CP (HL) ; Read the current low byte of FRAMES. JR Z,ANF_LOOP ; Loop back until the value of FRAMES changes. RET ; -------------------------------------------------- ; Generate Main Picture Area And Begin Bottom Border ; -------------------------------------------------- ; Enters here after all top border lines have been generated. First perform a delay to align with the next HSync pulse. ARX_DRIVER_PICTURE: LD B,$03 ; 7 Delay to ensure the LNCTR is reset when aligned with the hardware generated HSync. ADP_DELAY: DJNZ ADP_DELAY ; 13/8=34 ; Preserve the value of the I register. LD A,I ; 9 LD E,A ; 4 Save the current value of I, which allows the user program to use it. ; Prepare registers for exiting the driver routine. LD A,($4028) ; 13 Fetch the number of bottom border lines from system variable MARGIN. DEC A ; 4 Decrement the number of bottom border lines to output to compensate for the initial delay of the VSync routine. LD D,A ; 4 Save the number of bottom border lines. LD IX,ARX_DRIVER_VSYNC ; 14 Set the display vector address to point at the VSync pulse generation routine. ; Initialise registers required by the ARX display driver. LD A,$20 ; 7 The first character set begins at $2000. LD I,A ; 9 LD BC,$0C08 ; 10 B=Number of character sets. C=Number of lines in a row. ; Force the LNCTR to reset so that characters are output starting with their top line. IN A,($FE) ; 11 Force the LNCTR to 0 aligned with the start of the hardware HSync pulse. Also sets the video output to black. ; Enter a loop to output the 8 lines of a row of characters. Each iteration takes 207 T-cycles. ADP_ROW1_LOOP: OUT ($FF),A ; 11 Set the video output back to white. Doubles as a delay on subsequent loop iterations. LD H,$00 ; 7 Delay to ensure the picture begins with the same left border size as the standard display. LD L,H ; 4 HL holds $0000 which ensures the CP (HL) instructions in later delays do not accidentally invoke any RAM based memory mapped devices. LD A,I ; 9 Restore the value of I into A since this will be incremented by 2 to advance to the next character set. L41C7: CALL DISPLAY_ROW1 + $8000 ; 17+(32*4)+10=155 Output a line of the row by 'executing' the echo of it. DEC C ; 4 Decrement the count of the number of lines in the row. JR Z,ADP_ROW1_DONE ; 12/7 Jump ahead if all lines of the row have been output. JP ADP_ROW1_LOOP ; 10 Loop back to output the next line of the row. ; Move on to outputting the second row of characters for the current character set. ADP_ROW1_DONE: LD C,$08 ; 7 Re-initialise with the number of lines in a row. ; Enter a loop to output the 8 lines of a row of characters. Each iteration takes 207 T-cycles. ADP_ROW2_LOOP: ADD HL,HL ; 11 Delay. ADD HL,HL ; 11 Delay. CP (HL) ; 7 Delay. This will read from address $0000 and so will not accidentally invoke any RAM based memory mapped devices. CALL DISPLAY_ROW2 + $8000 ; 17+(32*4)+10=155 Output a line of the row by 'executing' the echo of it. DEC C ; 4 Decrement the count of the number of lines in the row. JR Z,ADP_ROW2_DONE ; 12/7 Jump ahead if all lines of the row have been output. JR ADP_ROW2_LOOP ; 12 Loop back to output the next line of the row. ; Two rows have been output using the current character set, so now move onto the next character set. ADP_ROW2_DONE: ADD A,$02 ; 7 Advance to the next character set. LD I,A ; 9 LD C,$08 ; 7 Re-initialise with the number of lines in a row. DJNZ L41C7 ; 13/8 Loop back to render the next character set. ; All lines of the main picture area have been output. LD A,E LD I,A ; Restore the original value of I. LD A,D ; Retrieve the number of bottom border lines. JP $029E ; Enable the NMI generator and return to user program, which will be interrupted as the bottom border is being generated. ; ----------------------------------------- ; Generate VSync Pulse And Begin Top Border ; ----------------------------------------- ; Returns here after the bottom border has been generated. ARX_DRIVER_VSYNC: LD B,$09 ; 7 Delay to ensure the VSync pulse begins aligned with the end of the next hardware generated HSync. ADV_DELAY: DJNZ ADV_DELAY ; 13/8=112 PUSH IY ; 15 Save the current value of IY, which allows the user program to use it. LD IY,$4000 ; 14 Point at the start of the system variables, which is required by the VSync ROM routine. CALL $0220 ; Generate the VSync pulse and then start generating the top border lines. ; Immediately returns back to here but with the NMI generator now switched on. The IX register now holds $0281. POP IY ; Restore the original value of IY. LD IX,ARX_DRIVER_PICTURE ; Set the display vector address to point at the main picture generation routine. JP $02A2 ; Enable the NMI generator and return to the user program, which will be interrupted as the top border is being generated. ; ---------------------- ; Display Row Characters ; ---------------------- ; The two rows of characters rendered using each character set. DISPLAY_ROW1: DEFB $00, $01, $02, $03, $04, $05, $06, $07, $08, $09, $0A, $0B, $0C, $0D, $0E, $0F, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $1A, $1B, $1C, $1D, $1E, $1F RET DISPLAY_ROW2: DEFB $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $2A, $2B, $2C, $2D, $2E, $2F, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $3A, $3B, $3C, $3D, $3E, $3F RET ; ; Break ; ; preserves state, but will exit if SPACE is pushed check_break: exx ; save register states ; did the player press break key (space)? call $0f46 ; was break pressed? (break-1 ROM routine) jr nc,break ; no, exit as normal exx ; restore registers ret ; and return ; yes, exit the program as normal break: ; Switch to the standard display driver. call STOP_ARX_DRIVER rst $0008 ; call ERROR-1 reset db $ff ; with error code 0 (normal exit) ; ; Delay ; ; set bc to speed ; uses check_break to exit delay_count: dw $0000 delay: ld (delay_count),hl ; save delay call check_break ; check if done ld hl,(delay_count) ; grab what to test dec hl ; subtract 1 ld a,h ; check if done or l jr nz,delay ; not zero, keep going! ret ; pause is done! ; +++ ; Floating math ; ; these are from: https://github.com/Zeda/z80float/blob/master/f24/f24div.z80 f24cmp: ;returns the flags for float AHL minus float CDE ; AHL >= CDE, nc ; AHL < CDE, c ; AHL == CDE, z (and nc) ; ;Note: ; This allows some wiggle room in the bottom two bits. For example, if the two ; exponents are the same and the two significands differ by at most 3, they are ; considered equal. ; ;Note: ; NaN is a special case. This routines returns that NaN= 15 ;check if (B&7F) > 14 + (A&7F) ld c,a ;new exponent, need to save the sign for later comparison res 7,b and $7f add a,14 sub b jr nc,f24cmp_skip_nc xor a ret f24cmp_skip_nc: ;otherwise, not equal, so let's return the sign in c and nz ld a,c return_nz_sign_a: or $7f add a,a ret f24cmp_special: ld a,h or l ccf ret nz ;so the first op is inf ;if second of is finite, return the sign of B in carry and nz ld a,c and $7f inc a ld a,b jp p,return_nz_sign_a ;second op is either NaN or inf ld a,d or e ret nz ; op1 op2 result ; 7F 7F z, nc ; 7F FF nz,nc ; FF 7F nz,c ; FF FF z, nc ld a,c cp b ret f24add: ;AHL + CDE ==> AHL ;Destroys BC,DE ; ;save A ld b,a ;check for special values and $7f jr nz,return_CDE_skip return_CDE: ld a,c ex de,hl ret return_CDE_skip: inc a jp m,f24add_op1_inf_nan ld a,c ;check for special values and $7f jp z,return_exp_b inc a jp m,return_CDE ld a,b xor c jp m,f24add_subtract ;we need to add call f24add_reorder jr z,f24add_add_same_exp ret nc push bc call rshift_1DE sla b adc hl,de ;if carry is reset, then we are all good :) pop de ld a,d ret nc ;otherwise, we need to increment the sign and see if it overflows to inf and $7f cp $7e ld a,d jr z,f24_return_inf inc a ;we also need to shift a 0 down into the HL srl h rr l ret nc inc hl ret f24add_add_same_exp: ld a,b and $7f cp $7e ld a,b jr z,f24_return_inf inc a add hl,de rr h rr l ret nc inc l ret nz inc h ret nz inc a ret f24_return_inf: or %01111111 ld hl,0 ret f24add_subtract: call f24add_reorder jr z,f24add_subtract_same_exp ret nc push bc call rshift_1DE sub c ld c,a ld a,0 sbc a,b ld b,a sbc hl,de ;if carry is not set, then we are all good :) pop de ld a,d ret nc ;otherwise, the implicit bit is set to 0, so we need to renormalize normalize_D_HLBC: ;D is the sign+exponent ;HLBC is the significand ;returns AHLBC ;make sure HLBC is not 0 ld a,h or l or b or c ret z ld a,d normalize_D_HLBC_nonzero: ;save the sign add a,a push af rrca f24add_loop: dec a jr z,f24add_skip_ahead sla c rl b adc hl,hl jp nc,f24add_loop ;now round sla c ld bc,0 adc hl,bc ;if carry is set, then the implicit bit is 2, and the rest of the exponent is 0 ;so we can just increment A and keep HL as 0 adc a,b add a,a f24add_skip_ahead: ld d,a pop af ld a,d rra ret f24add_subtract_same_exp: ;subtract the significands ld a,b ; or a sbc hl,de ;if zero, then the result is zero, but we'll keep the same sign jr nz,f24_skip_ahead_nz and %10000000 ret f24_skip_ahead_nz: ;if the carry flag is set, then we need to change the sign of the output ;and negate the significand. if reset, then we still need to normalize and whatnot ld bc,0 jr nc,normalize_D_HLBC_nonzero xor $80 ld d,a xor a sub l ld l,a sbc a,a sub h ld h,a ld a,d jr normalize_D_HLBC_nonzero f24add_reorder: xor c rlc c rla ;Want to rearrange so that A-C>=0 sub c ret z jr nc,f24add_reorder_nc neg ;A is the difference in exponents rrc c ld b,c ex de,hl f24add_reorder_nc: ;A is how many bits to shift DE right ;B is the sign+exponent of the result or a rra cp 18 ret c return_exp_b: ld a,b ret f24add_op1_inf_nan: ld a,h or l jr nz,return_exp_b ;so op1 is +inf or -inf ;If op2 is finite, then just return op1 or c jr z,return_exp_b inc a add a,a jr nz,return_exp_b ;if op2 is NaN, return NaN ld a,d or e ld a,c jr nz,f24add_op1_inf_nan_nz ;so |op1| and |op2| are inf ;if they have the same sign, fine, else return NaN cp b ret z f24add_op1_inf_nan_nz: dec hl ret rshift_1DE: ld bc,0 scf rshift_1DE_loop: rr d rr e rr b rr c dec a jr nz,rshift_1DE_loop ret f24sub: ;AHL - CDE ==> AHL ;Destroys BC,DE ; ld b,a ld a,c xor $80 ld c,a ld a,b jp f24add f24rsub: ;-AHL + CDE ==> AHL ;Destroys BC,DE ; xor $80 jp f24add f24mul: ;AHL * CDE ==> AHL ;Destroys BC,DE ; ;put the output sign in the top bit of A ld b,a ld a,c and $80 xor b ;check for special values ;NaN*x ==> NaN ;0*fin ==> 0 ;0*inf ==> NaN ;inf*fin ==> inf ;inf*inf ==> inf ;save A ld b,a and $7f jr z,f24mul_op1_0 inc a jp m,f24mul_op1_inf_nan ;so the first value is finite ld a,c and $7f ld c,a ret z inc a jp m,return_CDE ;upper bit of B is the output sign ;first approximation of the exponent is ; (B&7F) + (C&7F) - 63 ld a,b and $7f add a,c sub 63 jr nc,f24mul_nc xor a ;underflowed, so return 0 ret f24mul_nc: cp $7f jr c,f24mul_return_inf_carry f24mul_return_inf: ld a,b or %01111111 ld hl,0 ;overflow so return inf ret f24mul_return_inf_carry: xor b and $7f xor b f24mul_significand: ;save the exponent push af ;now compute (1.HL * 1.DE) ; = (1+.HL)(1+.DE) ; = 1+.HL+.DE+.HL*.DE ld b,h ld c,l push hl push de call mul16 ;result is .DEHL ;we can discard HL, but first round xor a sla h ex de,hl pop bc adc hl,bc adc a,0 pop bc add hl,bc adc a,0 rra ;now 1+A.HL is the significand pop bc ;B is the exponent ld a,b ret z ccf rr h rr l inc a inc a add a,a jr z,f24mul_return_inf rra dec a ret f24mul_op1_0: ld a,c and $7f ret z inc a jp m,f24mul_return_NaN xor a ret f24mul_op1_inf_nan: ld a,h or l ld a,b ret nz ;NaN ;inf*0 is NaN ld a,c and $7f jr nz,f24mul_return_NaN_nz f24mul_return_NaN: dec a ;inf*0 ld h,a ;= ld l,a ;NaN ret f24mul_return_NaN_nz: inc a jp m,f24mul_return_NaN_m ld a,b ;returning inf ret f24mul_return_NaN_m: ;op1 is inf ;op2 is is either NaN or inf ; inf*NaN ==> NaN ; inf*inf ==> inf ;so just return op2's significand ld a,c ex de,hl ret ;This was made by Runer112 ;Tested by jacobly mul16: ;BC*DE --> DEHL ; ~544.887cc as calculated in jacobly's test ;min: 214cc (DE = 1) ;max: 667cc ;avg: 544.4507883cc however, deferring to jacobly's result as mine may have math issues ? ;177 bytes ld a,d ld d,0 ld h,b ld l,c add a,a jr c,Mul_BC_DE_DEHL_Bit14 add a,a jr c,Mul_BC_DE_DEHL_Bit13 add a,a jr c,Mul_BC_DE_DEHL_Bit12 add a,a jr c,Mul_BC_DE_DEHL_Bit11 add a,a jr c,Mul_BC_DE_DEHL_Bit10 add a,a jr c,Mul_BC_DE_DEHL_Bit9 add a,a jr c,Mul_BC_DE_DEHL_Bit8 add a,a jr c,Mul_BC_DE_DEHL_Bit7 ld a,e and %11111110 add a,a jr c,Mul_BC_DE_DEHL_Bit6 add a,a jr c,Mul_BC_DE_DEHL_Bit5 add a,a jr c,Mul_BC_DE_DEHL_Bit4 add a,a jr c,Mul_BC_DE_DEHL_Bit3 add a,a jr c,Mul_BC_DE_DEHL_Bit2 add a,a jr c,Mul_BC_DE_DEHL_Bit1 add a,a jr c,Mul_BC_DE_DEHL_Bit0 rr e ret c ld h,d ld l,e ret Mul_BC_DE_DEHL_Bit14: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit13 add hl,bc adc a,d Mul_BC_DE_DEHL_Bit13: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit12 add hl,bc adc a,d Mul_BC_DE_DEHL_Bit12: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit11 add hl,bc adc a,d Mul_BC_DE_DEHL_Bit11: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit10 add hl,bc adc a,d Mul_BC_DE_DEHL_Bit10: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit9 add hl,bc adc a,d Mul_BC_DE_DEHL_Bit9: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit8 add hl,bc adc a,d Mul_BC_DE_DEHL_Bit8: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit7 add hl,bc adc a,d Mul_BC_DE_DEHL_Bit7: ld d,a ld a,e and %11111110 add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit6 add hl,bc adc a,0 Mul_BC_DE_DEHL_Bit6: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit5 add hl,bc adc a,0 Mul_BC_DE_DEHL_Bit5: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit4 add hl,bc adc a,0 Mul_BC_DE_DEHL_Bit4: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit3 add hl,bc adc a,0 Mul_BC_DE_DEHL_Bit3: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit2 add hl,bc adc a,0 Mul_BC_DE_DEHL_Bit2: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit1 add hl,bc adc a,0 Mul_BC_DE_DEHL_Bit1: add hl,hl adc a,a jr nc,Mul_BC_DE_DEHL_Bit0 add hl,bc adc a,0 Mul_BC_DE_DEHL_Bit0: add hl,hl adc a,a jr c,Mul_BC_DE_DEHL_FunkyCarry rr e ld e,a ret nc add hl,bc ret nc inc e ret nz inc d ret Mul_BC_DE_DEHL_FunkyCarry: inc d rr e ld e,a ret nc add hl,bc ret nc inc e ret ;f24inv: ld c,a ex de,hl ld a,$3f ld hl,0 f24div: ;AHL * CDE ==> AHL ;Destroys BC,DE ; ;put the output sign in B ld b,a xor c add a,a ld a,b rla rrca ld b,a ;check for special values ;NaN/x ==> NaN ;0/fin ==> 0 ; 0/0 ==> NaN ;inf/inf ==> NaN ;inf/x ==> inf ;x/NaN ==> NaN ;x/inf ==> 0 ;x/0 ==> NaN and $7f jp z,f24div_0_x inc a jp m,f24div_infnan_x ld a,c and $7f jr nz,f24div_infnan_x_nz dec a ld h,a ld l,a ret f24div_infnan_x_nz: inc a jp m,f24div_x_infnan ;upper bit of B is the output sign ;first approximation of the exponent is ; (B&7F) - (C&7F) + 63 res 7,c ld a,b and $7f add a,63 sub c jr nc,f24div_skip_nc xor a ;underflowed, so return 0 ret f24div_skip_nc: cp $7f jr c,f24div_inf f24div_return_inf: ld a,b or %01111111 ld hl,0 ;overflow so return inf ret f24div_inf: ;now compute (1.HL / 1.DE) ; = (1+.HL)/(1+.DE) ; want 1.HL>1.DE, because then result is going to be 1.x ;so we can end up doing (.HL-.DE)/(1.DE) to 16 bits precision or a ld c,0 ;top bit of 1.HL-1.DE sbc hl,de jr nc,f24div_ready ;if carry is set, then DE was the larger of the two ;so we need to decrement the exponent and do ;(HL+DE)*2-DE dec a ;decrement exponent ret z ;return 0 if underflowed add hl,de add hl,hl rl c inc c sbc hl,de jr nc,f24div_ready dec c f24div_ready: ;C.HL is the numerator, 1.DE is the denominator ;A is the exponent, B[7] is the sign ;save the exponent and sign push bc push af ;we can now commence 16 iterations of this division call fdiv24_div16 pop de pop bc adc a,d jp p,f24div_return_positive f24div_return_NaN: dec a ld h,a ld l,a f24div_return_positive: xor b and $7f xor b ret fdiv24_div16: ;negate the divisior for more efficient division ;(16-bit addition is cheaper than subtraction) xor a sub e ld e,a ld a,0 sbc a,d ld d,a sbc a,a dec a ld b,a ld a,c call fdiv24_div8 rl c push bc call fdiv24_div8 rl c ;check if 2*A.HL>1.DE add hl,hl adc a,a add hl,de adc a,b pop hl ld h,l ld l,c ld bc,0 ld a,b adc hl,bc ret fdiv24_div8: call fdiv24_div4 fdiv24_div4: call fdiv24_div2 fdiv24_div2: call fdiv24_div1 fdiv24_div1: rl c add hl,hl adc a,a ret z add hl,de adc a,b ret c sbc hl,de sbc a,b ret f24div_0_x: ;make sure we aren't trying 0/NaN or 0/0 ld a,c and $7f jr z,f24div_return_NaN inc a jp m,f24div_x_infnan xor a ret f24div_x_infnan: ld a,d or e ret z ld a,c ex de,hl ret f24div_infnan_x: ld a,h or l ld a,b ret nz ;make sure x is not inf NaN or 0 ld a,c and $7f jr z,f24div_return_NaN inc a jp m,f24div_return_NaN ld a,b ret ; ; --- ; +++ ; ; Data and Defines ; ; ZX81 system vars d_file: equ $400c df_cc: equ 16398 last_k: equ 16421 margin: equ 16424 s_posn: equ 16441 frames: equ 16436 ; ZX81 ROM functions kscan: equ $02bb findchar: equ $07bd stop: equ $0cdc slow: equ $0f2b fast: equ $02e7 save: equ $02f9 printat: equ $08f5 pause: equ $0f35 cls: equ $0a2a ; ZX81 Characters (not ASCII) _sp: equ $00 _qu: equ $0b _lb: equ $0c _dl: equ $0d _cl: equ $0e _lp: equ $10 _rp: equ $11 _gt: equ $12 _lt: equ $13 _eq: equ $14 _pl: equ $15 _mi: equ $16 _as: equ $17 _sl: equ $18 _sc: equ $19 _cm: equ $1a _pr: equ $1b _0_: equ $1c _1_: equ $1d _2_: equ $1e _3_: equ $1f _4_: equ $20 _5_: equ $21 _6_: equ $22 _7_: equ $23 _8_: equ $24 _9_: equ $25 _a_: equ $26 _b_: equ $27 _c_: equ $28 _d_: equ $29 _e_: equ $2a _f_: equ $2b _g_: equ $2c _h_: equ $2d _i_: equ $2e _j_: equ $2f _k_: equ $30 _l_: equ $31 _m_: equ $32 _n_: equ $33 _o_: equ $34 _p_: equ $35 _q_: equ $36 _r_: equ $37 _s_: equ $38 _t_: equ $39 _u_: equ $3a _v_: equ $3b _w_: equ $3c _x_: equ $3d _y_: equ $3e _z_: equ $3f ; end defines ; ---