ZX81 Assembly Listing for mbsarx.asm

MBSARX*SLR/2024** (mbsarx.asm)

Converting my Mandelbrot set program to high resolution graphics using Paul Farrow’s ARX display driver.
;
; Mandelbrot Set ARX
; v1.00 Steven Reid (c) 2024
; ARX graphics from Paul Farrow
;
; Converting my mandelbrot set to high resolution
; graphics using ARX.
;

; +++
;
; Header and startup
;

        ; start up stuff
        org 16514               ; stored in REM at top (ZX81)
        jr start                ; needed for z80asm

; title and copyright (will show on listing)
copy:
        db _as,_as,_m_,_b_,_s_,_a_,_r_,_x_,_as,_as
        db _as,_s_,_l_,_r_,_sl,_2_,_0_,_2_,_4_,_as
        db _as,$76  ; **MBSARX***SLR/2024**

sloop: db 0,0,0
scale: db 0,0,0
shift: db 0,0,0
x_val: db 0,0,0
y_val: db 0,0,0
cr_val: db 0,0,0
ci_val: db 0,0,0
zr_val: db 0,0,0
zi_val: db 0,0,0

zerox:  equ 255         ; 256 pixels            (0-255)
zeroy:  equ 96          ; 192 pixels / 2        (0-191)
maxit:  equ 15
step:   equ 50

start:

; end header and startup
;
; ---

; +++
;
; Main Loop
;

 
; Switch to the ARX display driver.
call START_ARX_DRIVER

mainloop:

; for s=0 to 750 step 50
xor a           ; set scaling loop to 0
ld hl,0
ld (sloop),a    ; save scaleing loop variable
ld (sloop+1),hl

scale_loop:

; Clear the ARX display file.
        ld hl,$2000
        ld (hl),l
        ld de,$2001
        ld bc,$17ff
        ldir

; scale = 1/(s+80)
ld a,(sloop)            ; load scaling loop
ld hl,(sloop+1)
; .db $00,$40,$45  ;80
ld c,$45                ; load cde with 80
ld de,$4000
;; .db $00,$40,$43  ;20
;ld c,$43                ; load cde with 20
;ld de,$4000
call f24add             ; s+80

ld c,a                  ; copy result to numerator
ld d,h
ld e,l
; .db $00,$00,$3F  ;1
ld a,$3f                ; set ahl to 1
ld hl,0
call f24div             ; 1/(s+80) (ahl / cde)

ld (scale),a            ; save result in scale
ld (scale+1),hl

; shift = -1.75*s-47
ld a,(sloop)            ; load scaling loop
ld hl,(sloop+1)
;.db $00,$80,$BF  ;-1.5
ld c,$bf                ; -1.5
ld de,$8000
call f24mul             ; -1.75*s
;.db $00,$78,$44  ;47
ld c,$44                ; 47
ld de,$7800
call f24sub             ; -1.75*s-15
ld (shift),a            ; save result in shift
ld (shift+1),hl

; set width loop (x)
; .db $00,$E0,$45  ;120
ld a,$45                ; 120
ld hl,$e000
ld (x_val),a            ; save result in x_val
ld (x_val+1),hl

; for x = zerox to 0 step -1
ld b,zerox              ; set x loop value
x_loop:
        push bc

        dec b
        ld c,zeroy*2-1
        ld a,1          ; set to plot
        ld (plot_action),a
        call plot_routine

        ; cr = (x+shift)*scale
        ld a,(shift)            ; load shift into cde
        ld c,a
        ld de,(shift+1)
        ld a,(x_val)            ; load x_val into ahl
        ld hl,(x_val+1)
        call f24add             ; ahl+dce

        push af
        ld a,(scale)            ; load scale
        ld c,a
        pop af
        ld de,(scale+1)         ; load rest of scale

        call f24mul             ; (x+shift)*scale

        ld (cr_val),a           ; save result in cr
        ld (cr_val+1),hl

        ; set length loop (y)
        xor a                   ; set ahl to 0
        ld hl,0
        ld (y_val),a            ; save result in y_val
        ld (y_val+1),hl

        pop bc
        ; for y = 0 to zeroy-1
        ld c,zeroy              ; note starts at zeroy as we are counting down
        y_loop:
                push bc

                ; see if user wants to exit (called each run)
                call check_break

                ; ci = (zeroy-y)*scale
                ld a,(y_val)            ; load cde with y_val
                ld c,a
                ld de,(y_val+1)

                ; .db $00,$80,$45  ;96
                ld a,$45                ; set ahl to 96
                ld hl,$8000

                call f24sub             ; ahl-dce (96-y)

                push af
                ld a,(scale)            ; load scale
                ld c,a
                pop af
                ld de,(scale+1)         ; load rest of scale

                call f24mul             ; (zeroy-y)*scale

                ld (ci_val),a           ; save result in ci
                ld (ci_val+1),hl

                ; clear zr and zi
                xor a
                ld hl,0
                ld (zr_val),a           ; zr = 0
                ld (zr_val+1),hl
                ld (zi_val),a           ; zi = 0
                ld (zi_val+1),hl
                
                ; for i = maxit to 0 step -1
                ld b,maxit
                iteration_loop:
                        push bc

                        ; br = cr + zr*zr - zi*zi
                        ld a,(zr_val)           ; load zr
                        ld hl,(zr_val+1)
                        ld c,a                  ; load zr
                        ld de,(zr_val+1)
                        call f24mul             ; zr*zr
                        push af                 ; save zr*zr
                        push hl

                        ld a,(zi_val)           ; load zi
                        ld hl,(zi_val+1)
                        ld c,a                  ; load zi
                        ld de,(zi_val+1)
                        call f24mul             ; zi*zi

                        ld c,a                  ; move zi*zi to cde
                        ld d,h
                        ld e,l
                        pop hl
                        pop af                  ; restore zr*zr
                        call f24sub             ; zr*zr - zi*zi

                        ld c,a                  ; load cde with zr*zr - zi*zi
                        ld d,h
                        ld e,l
                        ld a,(cr_val)           ; load ahl with cr
                        ld hl,(cr_val+1)
                        call f24add             ; cr + (zr*zr - zi*zi)

                        push hl
                        push af                 ; save br (will become zr)

                        ; zi = ci + 2*zr*zi
                        ld a,(zi_val)           ; load zi
                        ld c,a
                        ld de,(zi_val+1)
                        ld a,(zr_val)           ; load zr
                        ld hl,(zr_val+1)
                        call f24mul             ; zr*zi
                        ; .db $00,$00,$40  ;2
                        ld c,$40                ; set cde to 2
                        ld de,0
                        call f24mul             ; (2*zr*zi)

                        ld c,a                  ; load cde with 2*zr*zi
                        ld d,h
                        ld e,l

                        ld a,(ci_val)           ; load ahl with ci
                        ld hl,(ci_val+1)
                        call f24add             ; ci + 2*zr*zi
                        ld (zi_val),a           ; save new zi
                        ld (zi_val+1),hl

                        ; zr = br
                        pop af
                        pop hl
                        ld (zr_val),a           ; save new zr
                        ld (zr_val+1),hl

                        ld c,a                  ; load zr
                        ld de,(zr_val+1)
                        call f24mul             ; zr*zr
                        push af                 ; save zr*zr
                        push hl

                        ld a,(zi_val)           ; load zi
                        ld hl,(zi_val+1)
                        ld c,a                  ; load zi
                        ld de,(zi_val+1)
                        call f24mul             ; zi*zi

                        ld c,a                  ; move zi*zi to cde
                        ld d,h
                        ld e,l
                        pop hl
                        pop af                  ; restore zr*zr
                        call f24add             ; zr*zr + zi*zi

                        ; .db $00,$00,$41  ;4
                        ld c,$41                ; set cde to 4
                        ld de,0
                        call f24cmp             ; compare!
                        pop bc                  ; restore bc
                        
                        ; if > 4, then plot
                        jp c,next_i             ; ahl < cde (c) nope
                        jp z,next_i             ; ahl = cde (z) nope

                        plot_point:             ; yes, plot a point!
                                ld a,1          ; set to plot
                                ld (plot_action),a
                                jr print_pixel  ; and and print it

                        ; next i
                        next_i:
                        dec b
                        jp nz,iteration_loop

                        ; passed through, so unplot
                        xor a                   ; unplot point
                        ld (plot_action),a

                print_pixel:
                        pop bc          ; pop x,y coordinates
                        push bc         ; push them back for later
                        dec b           ; sub x by 1
                        ld a,zeroy      ; zeroy-y to (reverses plot)
                        sub c
                        ld c,a
                        call plot_routine
                        pop bc          ; pop x,y coordinates
                        push bc         ; push them back for later
                        dec b           ; sub x by 1
                        ld a,zeroy-2
                        add a,c
                        ld c,a
                        call plot_routine

        next_y:
                ; y_val + 1
                ld a,(y_val)            ; load y_val
                ld hl,(y_val+1)
                ; .db $00,$00,$3F  ;1
                ld c,$3f                ; set cde to 1
                ld de,0
                call f24add             ; increment y_val
                ld (y_val),a            ; and save it
                ld (y_val+1),hl

                pop bc
                dec c
                jp nz,y_loop

        ; x_val - 1
        push bc
        ld a,(x_val)            ; load x_val
        ld hl,(x_val+1)
        ; .db $00,$00,$3F  ;1
        ld c,$3f                ; set cde to 1
        ld de,0
        call f24sub             ; decrement x_val
        ld (x_val),a            ; and save it
        ld (x_val+1),hl

        pop bc
        push bc
        dec b
        ld c,zeroy*2-1
        ld a,0          ; set to plot
        ld (plot_action),a
        call plot_routine

        pop bc
        dec b
        jp nz,x_loop

        ld hl,$9000             ; longer pause
        call delay

        ; next s
        
        ; increment scaling loop
        ld a,(sloop)            ; load scaling loop
        ld hl,(sloop+1)
        ;.db $00,$90,$43  ;25
        ld c,$43                ; load step with 50
        ld de,$9000
        call f24add             ; S+50
        ld (sloop),a            ; save scaleing loop variable
        ld (sloop+1),hl

        ; are we done?
        ; .db $80,$77,$48  ;751
        ld c,$48                ; load test with 751
        ld de,$7780
        call f24cmp

        jp c,scale_loop         ; not done yet! (sloop < 751)

        jp mainloop             ; start again!

; End of loop!
;


; +++
;
; Routines
;

; +++
; Plot Routine
;
plot_action:    db 1    ; plot = 1, unplot = 0

plot_routine:
        ; convert coordinates from bc (yx) to de (yx)
        ld d,c
        ld e,b
        call PLOT
        ret

; end print_pixel
; ---

; ------------
; Plot a Pixel
; ------------
; Entry: D=Y coordinate (0-191).
;        E=X coordinate (0-255).
; Preserves DE.
PLOT:
        ld h,$01                                ; The high byte of the base address of the display file divided by 32.
        ld l,d                                  ; The Y coordinate, i.e. the line number.
        add hl,hl
        add hl,hl
        add hl,hl
        add hl,hl
        add hl,hl                               ; $2000 + (Line * 32).

        ld a,e                                  ; The X coordinate, i.e. the column/pixel number.
        rrca
        rrca
        rrca                                    ; Discard the pixel number.
        and $1f                                 ; Keep the column number (0-31).
        or l
        ld l,a                                  ; $2000 + (Line * 32) + Column.
        
        ld a,e                                  ; The X coordinate.
        and $07                                 ; Keep the pixel number (0-7).
        ld b,a
        inc b                                   ; B=1-8.
        ld a,$01                                ; The pixel marker.
        
P_LOOP: rrca                                    ; Shift the pixel into the correct bit position.
        djnz P_LOOP
        
        call TRANSLATE_DFILE_ADDRESS            ; Convert from a logical display file address into an ARX display file address.

        ld b,a
        ld a,(plot_action)
        and a
        ld a,b
        jr z,unplot_pixel

        plot_pixel:
        or (hl)                                 ; Combine the pixel with the current display file location byte.
        ld (hl),a                               ; Write the new pixel pattern back into the display file.
        ret

        unplot_pixel:
        ld b,255
        xor b                                   ; Convert to a mask to remove a pixel
        and (hl)                                ; And the mask with the current display file location byte.
        ld (hl),a                               ; Write the new pixel pattern back into the display file.
        ret


; ------------------------------------------------------------------
; Translate ARX Display File Address to Logical Display File Address
; ------------------------------------------------------------------
; Entry: HL=Logical display File address.
; Exit : HL=ARX display file address.
TRANSLATE_DFILE_ADDRESS:
        rlc  l                                  ; Reorder the line and column bits.
        rlc  l
        rlc  l
        ret
; ---

; ----------
; ARX Driver
; ----------
; ===========================
; Enhanced ARX Display Driver
; ===========================
; Created 13th March 2024 by Paul Farrow (comments revised 19th March 2024).
; Based on the original ARX display driver created by Andy Rea.
; You are free to use and modify this code for your own programs.

; -------------------
; ARX High Resolution
; -------------------

; Method of Operation
; -------------------
; The ARX high resolution mechanism was devised by Andy Rea in 2006 and exploits User Define Graphics (UDGs) in the 8K-16K region.
; The ZX81 hardware allows the display pixel patterns to be fetched from 0K-16K region, the 0.5K region specified by the value set
; into the I register. If RAM is populated in the 8K-16K region then the I register can be pointed at it allow UDGs. This only allows
; 64 characters to be redefined, which is enough to display a different character in every position across of 2 rows. By switching
; to a different UDG character set every 2 rows, every position within the display can be populated with its own unique character
; pattern. The ARX display driver controls the process of cycling through the different UDG character sets. The RAM populated in the
; 8K-16K region of the memory map must not be WRX enabled.

; ARX Driver Enhancements
; -----------------------
; The ARX display driver present here has a number of improvements over Andy's original ARX display driver:
; - It is robust to timing variations that can be introduced by connected devices (e.g. ZXpand) that cause vertical rotation of characters.
; - It allows faster user program operation by allowing it to run during more border lines.
; - It allows the IY and I registers to be used by the user program.
; - It outputs a VSync pulse that begins and ends aligned with the HSync pulses.

; ARX Display File Layout
; -----------------------
; Six consecutive UDG character sets form the high resolution display file. The display file is not laid out logically such that it
; progresses across the columns of the first line before moving done to the next line, and continues through all lines in this way.
; Instead the ARX display file progresses down the 8 lines for the character in row 0 column 0, then down the 8 lines for the character
; in row 0 column 1, then the lines for row 0 column 2, etc, before then moving to the start of the next row and repeating this pattern.
;
; The display file for the ARX driver presented here is 6K in size and occupies memory locations $2000 - $37FF.
;
; An address in the ARX display file is composed as follows:
;
;  15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0 
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
; | 0 | 0 | 1 | R | R | R | R | R | C | C | C | C | C | L | L | L |
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
;
; R = Row number (0-23).
; C = Column number (0-31).
; L = Line number (0-7).

; Logical Display File Translation
; --------------------------------
; It can often be easier for a program to operate using a logical display file layout. Converting a logical display file address to an
; address in the ARX display file can be done by rotating the low byte leftwards to reorder the column and line bits as follows:
;
;  15  14  13  12  11  10   9   8   7   6   5   4   3   2   1   0 
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
; | 0 | 0 | 1 | R | R | R | R | R | L | L | L | C | C | C | C | C |
; +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+
;
; R = Row number (0-23).
; C = Column number (0-31).
; L = Line number (0-7).

; ----------------
; Start ARX Driver
; ----------------

START_ARX_DRIVER:
        CALL AWAIT_NEW_FRAME                    ; Await the start of a new frame to ensure a flicker-free transition.

        LD   IX,ARX_DRIVER_PICTURE              ; The ARX display driver will be executed upon the next activation of the display mechanism.
        RET

; ---------------
; Stop ARX Driver
; ---------------

STOP_ARX_DRIVER:
        CALL AWAIT_NEW_FRAME                    ; Await the start of a new frame to ensure a flicker-free transition.

        LD   A,$1E                              ; Revert to the standard pixel patterns in the ROM.
        LD   I,A
        LD   IX,$0281                           ; The standard display driver will be executed upon the next activation of the display mechanism.
        RET

; --------------------------
; Await Start of a New Frame
; --------------------------

AWAIT_NEW_FRAME:
        LD   HL,$4034                           ; Point at the FRAMES system variable.
        LD   A,(HL)                             ; Note the initial value of the low byte of FRAMES.

ANF_LOOP:
        CP   (HL)                               ; Read the current low byte of FRAMES.
        JR   Z,ANF_LOOP                         ; Loop back until the value of FRAMES changes.
        
        RET

; --------------------------------------------------
; Generate Main Picture Area And Begin Bottom Border
; --------------------------------------------------
; Enters here after all top border lines have been generated. First perform a delay to align with the next HSync pulse.

ARX_DRIVER_PICTURE:
        LD   B,$03                              ; 7                     Delay to ensure the LNCTR is reset when aligned with the hardware generated HSync.

ADP_DELAY:
        DJNZ ADP_DELAY                          ; 13/8=34

; Preserve the value of the I register.

        LD   A,I                                ; 9
        LD   E,A                                ; 4                     Save the current value of I, which allows the user program to use it.

; Prepare registers for exiting the driver routine.

        LD   A,($4028)			        ; 13                    Fetch the number of bottom border lines from system variable MARGIN.
        DEC  A                                  ; 4                     Decrement the number of bottom border lines to output to compensate for the initial delay of the VSync routine.
        LD   D,A                                ; 4                     Save the number of bottom border lines.
       
        LD   IX,ARX_DRIVER_VSYNC                ; 14                    Set the display vector address to point at the VSync pulse generation routine.

; Initialise registers required by the ARX display driver.

        LD   A,$20                              ; 7                     The first character set begins at $2000.
        LD   I,A                                ; 9

        LD   BC,$0C08                           ; 10                    B=Number of character sets. C=Number of lines in a row.

; Force the LNCTR to reset so that characters are output starting with their top line.

        IN   A,($FE)                            ; 11                    Force the LNCTR to 0 aligned with the start of the hardware HSync pulse. Also sets the video output to black.

; Enter a loop to output the 8 lines of a row of characters. Each iteration takes 207 T-cycles.

ADP_ROW1_LOOP:
        OUT  ($FF),A                            ; 11                    Set the video output back to white. Doubles as a delay on subsequent loop iterations.

        LD   H,$00                              ; 7                     Delay to ensure the picture begins with the same left border size as the standard display.
        LD   L,H                                ; 4                     HL holds $0000 which ensures the CP (HL) instructions in later delays do not accidentally invoke any RAM based memory mapped devices.

        LD   A,I                                ; 9                     Restore the value of I into A since this will be incremented by 2 to advance to the next character set.

L41C7:  CALL DISPLAY_ROW1 + $8000               ; 17+(32*4)+10=155      Output a line of the row by 'executing' the echo of it.
        DEC  C                                  ; 4                     Decrement the count of the number of lines in the row.
        JR   Z,ADP_ROW1_DONE                    ; 12/7                  Jump ahead if all lines of the row have been output.

        JP   ADP_ROW1_LOOP                      ; 10                    Loop back to output the next line of the row.

; Move on to outputting the second row of characters for the current character set.

ADP_ROW1_DONE:
        LD  C,$08                               ; 7                     Re-initialise with the number of lines in a row.

; Enter a loop to output the 8 lines of a row of characters. Each iteration takes 207 T-cycles.

ADP_ROW2_LOOP:
        ADD HL,HL                               ; 11                    Delay.
        ADD HL,HL                               ; 11                    Delay.
        CP  (HL)                                ; 7                     Delay. This will read from address $0000 and so will not accidentally invoke any RAM based memory mapped devices.

        CALL DISPLAY_ROW2 + $8000               ; 17+(32*4)+10=155      Output a line of the row by 'executing' the echo of it.
        DEC  C                                  ; 4                     Decrement the count of the number of lines in the row.
        JR   Z,ADP_ROW2_DONE                    ; 12/7                  Jump ahead if all lines of the row have been output.

        JR   ADP_ROW2_LOOP                      ; 12                    Loop back to output the next line of the row.

; Two rows have been output using the current character set, so now move onto the next character set.

ADP_ROW2_DONE:
        ADD  A,$02                              ; 7                     Advance to the next character set.
        LD   I,A                                ; 9

        LD   C,$08                              ; 7                     Re-initialise with the number of lines in a row.
        DJNZ L41C7                              ; 13/8                  Loop back to render the next character set.

; All lines of the main picture area have been output.

        LD   A,E
        LD   I,A                                ;                       Restore the original value of I.

        LD   A,D                                ;                       Retrieve the number of bottom border lines.
        JP   $029E				;                       Enable the NMI generator and return to user program, which will be interrupted as the bottom border is being generated.

; -----------------------------------------
; Generate VSync Pulse And Begin Top Border
; -----------------------------------------
; Returns here after the bottom border has been generated.

ARX_DRIVER_VSYNC:
        LD   B,$09                              ; 7                     Delay to ensure the VSync pulse begins aligned with the end of the next hardware generated HSync.
        
ADV_DELAY:
        DJNZ ADV_DELAY                          ; 13/8=112
        
        PUSH IY                                 ; 15                    Save the current value of IY, which allows the user program to use it.

        LD   IY,$4000                           ; 14                    Point at the start of the system variables, which is required by the VSync ROM routine.
        CALL $0220				;                       Generate the VSync pulse and then start generating the top border lines.

; Immediately returns back to here but with the NMI generator now switched on. The IX register now holds $0281.

        POP  IY                                 ;                       Restore the original value of IY.

        LD   IX,ARX_DRIVER_PICTURE              ;                       Set the display vector address to point at the main picture generation routine.
        JP   $02A2				;                       Enable the NMI generator and return to the user program, which will be interrupted as the top border is being generated.

; ----------------------
; Display Row Characters
; ----------------------
; The two rows of characters rendered using each character set.

DISPLAY_ROW1:
        DEFB $00, $01, $02, $03, $04, $05, $06, $07, $08, $09, $0A, $0B, $0C, $0D, $0E, $0F, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $1A, $1B, $1C, $1D, $1E, $1F
        RET

DISPLAY_ROW2:
        DEFB $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $2A, $2B, $2C, $2D, $2E, $2F, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $3A, $3B, $3C, $3D, $3E, $3F
        RET

;
; Break
;
; preserves state, but will exit if SPACE is pushed

check_break:
        exx                     ; save register states

        ; did the player press break key (space)?
        call $0f46              ; was break pressed? (break-1 ROM routine)
        jr nc,break              ; no, exit as normal

        exx                     ; restore registers
        ret                     ; and return

        ; yes, exit the program as normal
break:
        ; Switch to the standard display driver.
        call STOP_ARX_DRIVER

        rst $0008               ; call ERROR-1 reset
        db $ff                  ; with error code 0 (normal exit)

;
; Delay
;
; set bc to speed
; uses check_break to exit

delay_count: dw $0000
delay:
        ld (delay_count),hl     ; save delay

        call check_break

        ; check if done
        ld hl,(delay_count)     ; grab what to test
        dec hl                  ; subtract 1
        ld a,h                  ; check if done
        or l
        jr nz,delay             ; not zero, keep going!

        ret                     ; pause is done!

; +++
; Floating math
;
; these are from: https://github.com/Zeda/z80float/blob/master/f24/f24div.z80

f24cmp:
;returns the flags for float AHL minus float CDE
;   AHL >= CDE, nc
;   AHL < CDE,  c
;   AHL == CDE, z (and nc)
;
;Note:
;   This allows some wiggle room in the bottom two bits. For example, if the two
;   exponents are the same and the two significands differ by at most 3, they are
;   considered equal.
;
;Note:
;   NaN is a special case. This routines returns that NaN= 15
;check if (B&7F) > 14 + (A&7F)
  ld c,a    ;new exponent, need to save the sign for later comparison
  res 7,b
  and $7f
  add a,14
  sub b
  jr nc,f24cmp_skip_nc
  xor a
  ret

f24cmp_skip_nc:
;otherwise, not equal, so let's return the sign in c and nz
  ld a,c
return_nz_sign_a:
  or $7f
  add a,a
  ret

f24cmp_special:
  ld a,h
  or l
  ccf
  ret nz

;so the first op is inf

;if second of is finite, return the sign of B in carry and nz

  ld a,c
  and $7f
  inc a
  ld a,b
  jp p,return_nz_sign_a

;second op is either NaN or inf
  ld a,d
  or e
  ret nz

; op1 op2 result
; 7F  7F  z, nc
; 7F  FF  nz,nc
; FF  7F  nz,c
; FF  FF  z, nc
  ld a,c
  cp b
  ret

f24add:
;AHL + CDE ==> AHL
;Destroys BC,DE
;
;save A
  ld b,a

;check for special values
  and $7f
  jr nz,return_CDE_skip
return_CDE:
  ld a,c
  ex de,hl
  ret
return_CDE_skip:
  inc a
  jp m,f24add_op1_inf_nan

  ld a,c
;check for special values
  and $7f
  jp z,return_exp_b

  inc a
  jp m,return_CDE


  ld a,b
  xor c
  jp m,f24add_subtract
;we need to add

  call f24add_reorder
  jr z,f24add_add_same_exp
  ret nc
  push bc
  call rshift_1DE
  sla b
  adc hl,de
  ;if carry is reset, then we are all good :)
  pop de
  ld a,d
  ret nc
;otherwise, we need to increment the sign and see if it overflows to inf
  and $7f
  cp $7e
  ld a,d
  jr z,f24_return_inf
  inc a

;we also need to shift a 0 down into the HL
  srl h
  rr l
  ret nc
  inc hl
  ret

f24add_add_same_exp:
  ld a,b
  and $7f
  cp $7e
  ld a,b
  jr z,f24_return_inf
  inc a
  add hl,de
  rr h
  rr l
  ret nc
  inc l
  ret nz
  inc h
  ret nz
  inc a
  ret

f24_return_inf:
  or %01111111
  ld hl,0
  ret


f24add_subtract:
  call f24add_reorder
  jr z,f24add_subtract_same_exp
  ret nc
  push bc
  call rshift_1DE
  sub c
  ld c,a
  ld a,0
  sbc a,b
  ld b,a
  sbc hl,de
  ;if carry is not set, then we are all good :)
  pop de
  ld a,d
  ret nc

  ;otherwise, the implicit bit is set to 0, so we need to renormalize
normalize_D_HLBC:
;D is the sign+exponent
;HLBC is the significand
;returns AHLBC
  ;make sure HLBC is not 0
  ld a,h
  or l
  or b
  or c
  ret z

  ld a,d
normalize_D_HLBC_nonzero:
  ;save the sign
  add a,a
  push af
  rrca

f24add_loop:
  dec a
  jr z,f24add_skip_ahead
  sla c
  rl b
  adc hl,hl
  jp nc,f24add_loop
  ;now round
  sla c
  ld bc,0
  adc hl,bc
  ;if carry is set, then the implicit bit is 2, and the rest of the exponent is 0
  ;so we can just increment A and keep HL as 0
  adc a,b
  add a,a
f24add_skip_ahead:
  ld d,a
  pop af
  ld a,d
  rra
  ret

f24add_subtract_same_exp:
;subtract the significands
  ld a,b
;  or a
  sbc hl,de

;if zero, then the result is zero, but we'll keep the same sign
  jr nz,f24_skip_ahead_nz
  and %10000000
  ret

f24_skip_ahead_nz:
  ;if the carry flag is set, then we need to change the sign of the output
  ;and negate the significand. if reset, then we still need to normalize and whatnot
  ld bc,0
  jr nc,normalize_D_HLBC_nonzero
  xor $80
  ld d,a
  xor a
  sub l
  ld l,a
  sbc a,a
  sub h
  ld h,a
  ld a,d
  jr normalize_D_HLBC_nonzero

f24add_reorder:
  xor c
  rlc c
  rla
;Want to rearrange so that A-C>=0
  sub c
  ret z
  jr nc,f24add_reorder_nc
  neg
  ;A is the difference in exponents
  rrc c
  ld b,c
  ex de,hl
f24add_reorder_nc:
;A is how many bits to shift DE right
;B is the sign+exponent of the result
  or a
  rra
  cp 18
  ret c
return_exp_b:
  ld a,b
  ret

f24add_op1_inf_nan:
  ld a,h
  or l
  jr nz,return_exp_b
;so op1 is +inf or -inf
;If op2 is finite, then just return op1
  or c
  jr z,return_exp_b
  inc a
  add a,a
  jr nz,return_exp_b

;if op2 is NaN, return NaN
  ld a,d
  or e
  ld a,c
  jr nz,f24add_op1_inf_nan_nz

;so |op1| and |op2| are inf
;if they have the same sign, fine, else return NaN
  cp b
  ret z
f24add_op1_inf_nan_nz:
  dec hl
  ret

rshift_1DE:
  ld bc,0
  scf
rshift_1DE_loop:
  rr d
  rr e
  rr b
  rr c
  dec a
  jr nz,rshift_1DE_loop
  ret


f24sub:
;AHL - CDE ==> AHL
;Destroys BC,DE
;
  ld b,a
  ld a,c
  xor $80
  ld c,a
  ld a,b
  jp f24add

f24rsub:
;-AHL + CDE ==> AHL
;Destroys BC,DE
;
  xor $80
  jp f24add

f24mul:
;AHL * CDE ==> AHL
;Destroys BC,DE
;

;put the output sign in the top bit of A
  ld b,a
  ld a,c
  and $80
  xor b

;check for special values
;NaN*x ==> NaN
;0*fin ==> 0
;0*inf ==> NaN
;inf*fin ==> inf
;inf*inf ==> inf

;save A
  ld b,a
  and $7f
  jr z,f24mul_op1_0
  inc a
  jp m,f24mul_op1_inf_nan

;so the first value is finite
  ld a,c
  and $7f
  ld c,a
  ret z
  inc a
  jp m,return_CDE

;upper bit of B is the output sign
;first approximation of the exponent is
; (B&7F) + (C&7F) - 63
  ld a,b
  and $7f
  add a,c
  sub 63
  jr nc,f24mul_nc
  xor a     ;underflowed, so return 0
  ret
f24mul_nc:
  cp $7f
  jr c,f24mul_return_inf_carry
f24mul_return_inf:
  ld a,b
  or %01111111
  ld hl,0   ;overflow so return inf
  ret
f24mul_return_inf_carry:

  xor b
  and $7f
  xor b
f24mul_significand:
;save the exponent
  push af

;now compute (1.HL * 1.DE)
; = (1+.HL)(1+.DE)
; = 1+.HL+.DE+.HL*.DE
  ld b,h
  ld c,l
  push hl
  push de
  call mul16
  ;result is .DEHL
  ;we can discard HL, but first round
  xor a
  sla h
  ex de,hl
  pop bc
  adc hl,bc
  adc a,0
  pop bc
  add hl,bc
  adc a,0
  rra
;now 1+A.HL is the significand
  pop bc    ;B is the exponent
  ld a,b
  ret z
  ccf
  rr h
  rr l
  inc a
  inc a
  add a,a
  jr z,f24mul_return_inf
  rra
  dec a
  ret

f24mul_op1_0:
  ld a,c
  and $7f
  ret z
  inc a
  jp m,f24mul_return_NaN
  xor a
  ret

f24mul_op1_inf_nan:
  ld a,h
  or l
  ld a,b
  ret nz    ;NaN

;inf*0 is NaN
  ld a,c
  and $7f
  jr nz,f24mul_return_NaN_nz
f24mul_return_NaN:
  dec a   ;inf*0
  ld h,a  ;=
  ld l,a  ;NaN
  ret
f24mul_return_NaN_nz:
  inc a
  jp m,f24mul_return_NaN_m
  ld a,b    ;returning inf
  ret
f24mul_return_NaN_m:

;op1 is inf
;op2 is is either NaN or inf
; inf*NaN ==> NaN
; inf*inf ==> inf
;so just return op2's significand
  ld a,c
  ex de,hl
  ret

;This was made by Runer112
;Tested by jacobly
mul16:
;BC*DE --> DEHL
; ~544.887cc as calculated in jacobly's test
;min: 214cc  (DE = 1)
;max: 667cc
;avg: 544.4507883cc   however, deferring to jacobly's result as mine may have math issues ?
;177 bytes
	ld	a,d
	ld	d,0
	ld	h,b
	ld	l,c
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit14
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit13
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit12
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit11
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit10
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit9
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit8
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit7
	ld	a,e
 	and	%11111110
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit6
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit5
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit4
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit3
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit2
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit1
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit0
	rr	e
	ret	c
	ld	h,d
	ld	l,e
	ret

Mul_BC_DE_DEHL_Bit14:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit13
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit13:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit12
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit12:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit11
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit11:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit10
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit10:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit9
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit9:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit8
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit8:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit7
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit7:
	ld	d,a
	ld	a,e
	and	%11111110
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit6
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit6:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit5
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit5:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit4
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit4:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit3
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit3:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit2
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit2:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit1
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit1:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit0
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit0:
	add	hl,hl
	adc	a,a
	jr	c,Mul_BC_DE_DEHL_FunkyCarry
	rr	e
	ld	e,a
	ret	nc
	add	hl,bc
	ret	nc
	inc	e
	ret	nz
	inc	d
	ret

Mul_BC_DE_DEHL_FunkyCarry:
	inc	d
	rr	e
	ld	e,a
	ret	nc
	add	hl,bc
	ret	nc
	inc	e
	ret


;f24inv:
  ld c,a
  ex de,hl
  ld a,$3f
  ld hl,0
f24div:
;AHL * CDE ==> AHL
;Destroys BC,DE
;
  ;put the output sign in B
  ld b,a
  xor c
  add a,a
  ld a,b
  rla
  rrca
  ld b,a


;check for special values
;NaN/x ==> NaN
;0/fin ==> 0
;  0/0 ==> NaN
;inf/inf ==> NaN
;inf/x ==> inf
;x/NaN ==> NaN
;x/inf ==> 0
;x/0 ==> NaN

  and $7f
  jp z,f24div_0_x
  inc a
  jp m,f24div_infnan_x

  ld a,c
  and $7f
  jr nz,f24div_infnan_x_nz
  dec a
  ld h,a
  ld l,a
  ret
f24div_infnan_x_nz:
  inc a
  jp m,f24div_x_infnan


;upper bit of B is the output sign
;first approximation of the exponent is
; (B&7F) - (C&7F) + 63
  res 7,c
  ld a,b
  and $7f
  add a,63
  sub c
  jr nc,f24div_skip_nc
  xor a     ;underflowed, so return 0
  ret
f24div_skip_nc:
  cp $7f
  jr c,f24div_inf
f24div_return_inf:
  ld a,b
  or %01111111
  ld hl,0   ;overflow so return inf
  ret
f24div_inf:


;now compute (1.HL / 1.DE)
; = (1+.HL)/(1+.DE)

; want 1.HL>1.DE, because then result is going to be 1.x
;so we can end up doing (.HL-.DE)/(1.DE) to 16 bits precision
  or a
  ld c,0    ;top bit of 1.HL-1.DE
  sbc hl,de
  jr nc,f24div_ready
  ;if carry is set, then DE was the larger of the two
  ;so we need to decrement the exponent and do
  ;(HL+DE)*2-DE
  dec a     ;decrement exponent
  ret z     ;return 0 if underflowed
  add hl,de
  add hl,hl
  rl c
  inc c
  sbc hl,de
  jr nc,f24div_ready
  dec c
f24div_ready:
;C.HL is the numerator, 1.DE is the denominator
;A is the exponent, B[7] is the sign
;save the exponent and sign
  push bc
  push af

;we can now commence 16  iterations of this division
  call fdiv24_div16

  pop de
  pop bc
  adc a,d
  jp p,f24div_return_positive
f24div_return_NaN:
  dec a
  ld h,a
  ld l,a
f24div_return_positive:
  xor b
  and $7f
  xor b
  ret

fdiv24_div16:
;negate the divisior for more efficient division
;(16-bit addition is cheaper than subtraction)
  xor a
  sub e
  ld e,a
  ld a,0
  sbc a,d
  ld d,a
  sbc a,a
  dec a
  ld b,a

  ld a,c
  call fdiv24_div8
  rl c
  push bc
  call fdiv24_div8
  rl c
  ;check if  2*A.HL>1.DE
  add hl,hl
  adc a,a
  add hl,de
  adc a,b

  pop hl
  ld h,l
  ld l,c
  ld bc,0
  ld a,b
  adc hl,bc
  ret

fdiv24_div8:
  call fdiv24_div4
fdiv24_div4:
  call fdiv24_div2
fdiv24_div2:
  call fdiv24_div1
fdiv24_div1:
  rl c
  add hl,hl
  adc a,a
  ret z
  add hl,de
  adc a,b
  ret c
  sbc hl,de
  sbc a,b
  ret

f24div_0_x:
;make sure we aren't trying 0/NaN or 0/0
  ld a,c
  and $7f
  jr z,f24div_return_NaN
  inc a
  jp m,f24div_x_infnan
  xor a
  ret
f24div_x_infnan:
  ld a,d
  or e
  ret z
  ld a,c
  ex de,hl
  ret

f24div_infnan_x:
  ld a,h
  or l
  ld a,b
  ret nz
  ;make sure x is not inf NaN or 0
  ld a,c
  and $7f
  jr z,f24div_return_NaN
  inc a
  jp m,f24div_return_NaN
  ld a,b
  ret

;
; ---

; +++
;
; Data and Defines
;

; ZX81 system vars
d_file:         equ $400c
df_cc:          equ 16398
last_k:         equ 16421
margin:         equ 16424
s_posn:         equ 16441
frames:         equ 16436

; ZX81 ROM functions
kscan:          equ $02bb
findchar:       equ $07bd
stop:           equ $0cdc
slow:           equ $0f2b
fast:           equ $02e7
save:           equ $02f9
printat:        equ $08f5
pause:          equ $0f35
cls:            equ $0a2a

; ZX81 Characters (not ASCII)
_sp:            equ $00
_qu:            equ $0b
_lb:            equ $0c
_dl:            equ $0d
_cl:            equ $0e
_lp:            equ $10
_rp:            equ $11
_gt:            equ $12
_lt:            equ $13
_eq:            equ $14
_pl:            equ $15
_mi:            equ $16
_as:            equ $17
_sl:            equ $18
_sc:            equ $19
_cm:            equ $1a
_pr:            equ $1b
_0_:            equ $1c
_1_:            equ $1d
_2_:            equ $1e
_3_:            equ $1f
_4_:            equ $20
_5_:            equ $21
_6_:            equ $22
_7_:            equ $23
_8_:            equ $24
_9_:            equ $25
_a_:            equ $26
_b_:            equ $27
_c_:            equ $28
_d_:            equ $29
_e_:            equ $2a
_f_:            equ $2b
_g_:            equ $2c
_h_:            equ $2d
_i_:            equ $2e
_j_:            equ $2f
_k_:            equ $30
_l_:            equ $31
_m_:            equ $32
_n_:            equ $33
_o_:            equ $34
_p_:            equ $35
_q_:            equ $36
_r_:            equ $37
_s_:            equ $38
_t_:            equ $39
_u_:            equ $3a
_v_:            equ $3b
_w_:            equ $3c
_x_:            equ $3d
_y_:            equ $3e
_z_:            equ $3f

; end defines
; ---
**MBSARX***SLR/2024** (mbsarx.asm)

ASSEMBLY PROGRAM LISTING

MBSARX*SLR/2024** (mbsarx.asm)