ZX81 Assembly Listing for mbsmc.asm


ZX81 assembly listing for **MBS MC***SLR/2022**

**MBS MC***SLR/2022** (mbsmc.asm)

My attempt at building a Mandelbrot sets program for the ZX81 in Assembly.


ASSEMBLY PROGRAM LISTING

;
; Mandelbrot
; Steven Reid (c) 2022
; v1 12/03/2022 - My attempt at writing a mandelbrot set in assembly.
; v2 12/11/2022 - Cleaned up routines for publish.

; +++
;
; Header and startup
;

        ; start up stuff
        org 16514               ; stored in REM at top (ZX81)
        jr start                ; needed for z80asm

; title and copyright (will show on listing)
copy:
        db _as,_as,_m_,_b_,_s_,_sp,_m_,_c_,_as,_as
        db _as,_s_,_l_,_r_,_sl,_2_,_0_,_2_,_2_,_as
        db _as,$76,$76  ; **MBS MC***SLR/2022**

sloop: db 0,0,0
scale: db 0,0,0
shift: db 0,0,0
x_val: db 0,0,0
y_val: db 0,0,0
cr_val: db 0,0,0
ci_val: db 0,0,0
zr_val: db 0,0,0
zi_val: db 0,0,0

zerox:  equ 32
zeroy:  equ 24
zeroy_f24_1:    equ $43         ; .db $00,$80,$43  ;24
zeroy_f24_2:    equ $8000
maxit:  equ 15
step:   equ 50

start:
        call slow               ; SLOW is required.
        call cls                ; clear screen / exapnd screen

; end header and startup
;
; ---

; +++
;
; Main Loop
;

mainloop:

; for s=0 to 750 step 50
xor a           ; set scaling loop to 0
ld hl,0
ld (sloop),a    ; save scaleing loop variable
ld (sloop+1),hl

scale_loop:

; scale = 1/(s+20)
ld a,(sloop)            ; load scaling loop
ld hl,(sloop+1)
; .db $00,$40,$43  ;20
ld c,$43                ; load cde with 20
ld de,$4000
call f24add             ; s+20

ld c,a                  ; copy result to numerator
ld d,h
ld e,l
; .db $00,$00,$3F  ;1
ld a,$3f                ; set ahl to 1
ld hl,0
call f24div             ; 1/(s+20) (ahl / cde)

ld (scale),a            ; save result in scale
ld (scale+1),hl

; shift = -1.75*s-47
ld a,(sloop)            ; load scaling loop
ld hl,(sloop+1)
;.db $00,$80,$BF  ;-1.5
ld c,$bf
ld de,$8000
call f24mul             ; -1.75*s
;.db $00,$78,$44  ;47
ld c,$44
ld de,$7800
call f24sub             ; -1.75*s-15
ld (shift),a           ; save result in shift
ld (shift+1),hl

; set width loop (x)
;.db $00,$F8,$44  ;63
ld a,$44
ld hl,$f800
ld (x_val),a            ; save result in x_val
ld (x_val+1),hl

; for x = zerox*2-1 to 0 step -1
ld b,zerox*2            ; set x loop value
x_loop:
        push bc

        ; display progress indicator
        dec b
        ld c,zeroy*2-1
        call zxplot


        ; cr = (x+shift)*scale
        ld a,(shift)            ; load shift into cde
        ld c,a
        ld de,(shift+1)
        ld a,(x_val)            ; load x_val into ahl
        ld hl,(x_val+1)
        call f24add             ; ahl+dce

        push af
        ld a,(scale)            ; load scale
        ld c,a
        pop af
        ld de,(scale+1)         ; load rest of scale

        call f24mul             ; (x+shift)*scale

        ld (cr_val),a           ; save result in cr
        ld (cr_val+1),hl

        ; set length loop (y)
        xor a                   ; set ahl to 0
        ld hl,0
        ld (y_val),a           ; save result in y_val
        ld (y_val+1),hl

        pop bc
        ; for y = 0 to zeroy-1
        ld c,zeroy              ; note starts at zeroy as we are counting down
        y_loop:
                push bc

                ; see if user wants to exit (called each run)
                call check_break

                ; ci = (zeroy-y)*scale
                ld a,(y_val)            ; load cde with y_val
                ld c,a
                ld de,(y_val+1)

                ; .db $00,$70,$43  ;23
                ld a,$43                ; set ahl to 23
                ld hl,$7000

                call f24sub             ; ahl-dce (22-y)

                push af
                ld a,(scale)            ; load scale
                ld c,a
                pop af
                ld de,(scale+1)         ; load rest of scale

                call f24mul             ; (zeroy-y)*scale

                ld (ci_val),a           ; save result in ci
                ld (ci_val+1),hl

                ; clear zr and zi
                xor a
                ld hl,0
                ld (zr_val),a           ; zr = 0
                ld (zr_val+1),hl
                ld (zi_val),a           ; zi = 0
                ld (zi_val+1),hl
                
                ; for i = maxit to 0 step -1
                ld b,maxit
                iteration_loop:
                        push bc

                        ; br = cr + zr*zr - zi*zi
                        ld a,(zr_val)           ; load zr
                        ld hl,(zr_val+1)
                        ld c,a                  ; load zr
                        ld de,(zr_val+1)
                        call f24mul             ; zr*zr
                        push af                 ; save zr*zr
                        push hl

                        ld a,(zi_val)           ; load zi
                        ld hl,(zi_val+1)
                        ld c,a                  ; load zi
                        ld de,(zi_val+1)
                        call f24mul             ; zi*zi

                        ld c,a                  ; move zi*zi to cde
                        ld d,h
                        ld e,l
                        pop hl
                        pop af                  ; restore zr*zr
                        call f24sub             ; zr*zr - zi*zi

                        ld c,a                  ; load cde with zr*zr - zi*zi
                        ld d,h
                        ld e,l
                        ld a,(cr_val)           ; load ahl with cr
                        ld hl,(cr_val+1)
                        call f24add             ; cr + (zr*zr - zi*zi)

                        push hl
                        push af                 ; save br (will become zr)

                        ; zi = ci + 2*zr*zi
                        ld a,(zi_val)           ; load zi
                        ld c,a
                        ld de,(zi_val+1)
                        ld a,(zr_val)           ; load zr
                        ld hl,(zr_val+1)
                        call f24mul             ; zr*zi
                        ; .db $00,$00,$40  ;2
                        ld c,$40                ; set cde to 2
                        ld de,0
                        call f24mul             ; (2*zr*zi)

                        ld c,a                  ; load cde with 2*zr*zi
                        ld d,h
                        ld e,l

                        ld a,(ci_val)           ; load ahl with ci
                        ld hl,(ci_val+1)
                        call f24add             ; ci + 2*zr*zi
                        ld (zi_val),a           ; save new zi
                        ld (zi_val+1),hl

                        ; zr = br
                        pop af
                        pop hl
                        ld (zr_val),a           ; save new zr
                        ld (zr_val+1),hl

                        ld c,a                  ; load zr
                        ld de,(zr_val+1)
                        call f24mul             ; zr*zr
                        push af                 ; save zr*zr
                        push hl

                        ld a,(zi_val)           ; load zi
                        ld hl,(zi_val+1)
                        ld c,a                  ; load zi
                        ld de,(zi_val+1)
                        call f24mul             ; zi*zi

                        ld c,a                  ; move zi*zi to cde
                        ld d,h
                        ld e,l
                        pop hl
                        pop af                  ; restore zr*zr
                        call f24add             ; zr*zr + zi*zi

                        ; .db $00,$00,$41  ;4
                        ld c,$41                ; set cde to 4
                        ld de,0
                        call f24cmp             ; compare!
                        pop bc                  ; restore bc
                        
                        ; if > 4, then plot
                        jp c,next_i             ; ahl < cde (c) nope
                        jp z,next_i             ; ahl = cde (z) nope

                        plot_point:             ; yes, plot a point!
                                ld a,1          ; set to plot
                                ld (plot_action),a
                                jr print_pixel  ; and and print it

                        ; next i
                        next_i:
                        dec b
                        jp nz,iteration_loop

                        ; passed through, so unplot
                        xor a                   ; unplot point
                        ld (plot_action),a

                print_pixel:
                        pop bc          ; pop x,y coordinates
                        push bc         ; push them back for later
                        dec b           ; sub x by 1
                        ld a,zeroy      ; zeroy-y to (reverses plot)
                        sub c
                        ld c,a
                        call plot_routine
                        pop bc          ; pop x,y coordinates
                        push bc         ; push them back for later
                        dec b           ; sub x by 1
                        ld a,zeroy-2
                        add a,c
                        ld c,a
                        call plot_routine

        next_y:
                ; y_val + 1
                ld a,(y_val)            ; load y_val
                ld hl,(y_val+1)
                ; .db $00,$00,$3F  ;1
                ld c,$3f                ; set cde to 1
                ld de,0
                call f24add             ; increment y_val
                ld (y_val),a            ; and save it
                ld (y_val+1),hl

                pop bc
                dec c
                jp nz,y_loop

        ; x_val - 1
        push bc
        ld a,(x_val)            ; load x_val
        ld hl,(x_val+1)
        ; .db $00,$00,$3F  ;1
        ld c,$3f                ; set cde to 1
        ld de,0
        call f24sub             ; decrement x_val
        ld (x_val),a            ; and save it
        ld (x_val+1),hl

        ; clear progress marker
        pop bc
        push bc
        dec b
        ld c,zeroy*2-1
        call zxunplot

        pop bc
        dec b
        jp nz,x_loop

        ld hl,$9999             ; longer pause
        call delay

        ; next s
        
        ; increment scaling loop
        ld a,(sloop)            ; load scaling loop
        ld hl,(sloop+1)
        ;.db $00,$90,$43  ;25
        ld c,$43        ; load step with 25
        ld de,$9000
        call f24add     ; S+25
        ld (sloop),a    ; save scaleing loop variable
        ld (sloop+1),hl

        ; are we done?
        ; .db $80,$77,$48  ;751
        ld c,$48        ; load test with 751
        ld de,$7780
        call f24cmp

        jp c,scale_loop ; not done yet! (sloop < 751)

        call fastcls            ; clear sreen

        jp mainloop             ; start again!

; End of loop!
;


; +++
;
; Routines
;

; +++
; ZXPlot
;
;       This routine plots a pixel to the screen.
;
;       Either set plot_action to 0 (white), 1 (black)
;       Or call "zxunplot" or "zxplot" directly
;       Set bc register to x/y coordinates (preserved)
;       x can be 0 to 63
;       y can be 0 to 47
;       all other registers are destroied

; variables
plot_action:    db 1    ; plot = 1, unplot = 0

zxunplot:
        xor a
        ld (plot_action),a
        jr plot_routine

zxplot:
        ld a,1
        ld (plot_action),a

plot_routine:
        ; get coordinates

        ; work out what square on the screen the pixel appears at
        ld a,b          ; ld a with x
        bit 7,a
        ret nz          ; out of bounds
        sub 64
        ret p           ; out of bounds

        ld a,c          ; ld a with y
        bit 7,a
        ret nz          ; out of bounds
        sub 48
        ret p           ; out of bounds

        ld d,0          ; set plotsector to 0

        ld a,b          ; ld a with x
        srl a
        ld b,a          ; save new x
        jr nc,pixelleft
        ld d,1          ; pixel is in right half

pixelleft:
        ld a,c          ; ld a with y
        srl a
        ld c,a          ; save new y
        jr nc,pixelup
        ld a,d          ; load a with plot sector
        set 1,a
        ld d,a          ; pixel is in lower half

pixelup:
        ;now find the correct screen position

        push de         ; save plotsector

        ld hl,(d_file)
        inc hl
        ld a,c          ; load a wity y
        or a            ; cp 0
        jr z,findx      ; already at y,skip
        ld de,33
findy:
        add hl,de
        dec a
        jr nz, findy

findx:
        ld de,$00
        ld e,b          ; load e with x
        add hl,de

        pop de          ; restore plotsector

        ; hl holds plot_orgin!

; are we plotting or unplotting?
        ld a,(plot_action)
        and a
        jp z,unplot_pixel

plot_pixel:
; plot the pixel in the co-ord from plot_origin (square on screen) and
; plotsector (actual pixel)
;   0= top left, 1 = top right, 2 = bottom left, 3 = bottom right

        ld a,d          ; ld a with plotsector
        or a            ; cp 0
        jr z,plottl
        dec a           ; cp 1
        jr z,plottr
        dec a           ; cp 2
        jr z,plotbl

        ;bottom right quadrant
        ld a,(hl)
        bit 7,a   ;collision detect
        ret nz
        set 7,a
        xor $07 ;invert all other bits
        ld (hl),a
        ret  ; plot pixel

plottl: ;top left quadrant
        ld a,(hl)
        bit 7,a
        jr nz,tlinvert
        set 0,a
        ld (hl),a
        ret

tlinvert:
        res 0,a
        ld (hl),a
        ret

plottr: ;top right quadrant
        ld a,(hl)
        bit 7,a
        jr nz,trinvert
        set 1,a
        ld (hl),a
        ret

trinvert:
        res 1,a
        ld (hl),a
        ret

plotbl: ;bottom left quadrant
        ld a,(hl)
        bit 7,a
        jr nz,blinvert
        set 2,a
        ld (hl),a
        ret

blinvert:
        res 2,a
        ld (hl),a
        ret

unplot_pixel:
        ; unplot the pixel in the co-ord from plot_origin (square on screen) and
        ; plotsector (actual pixel)
        ;  0= top left, 1 = top right, 2 = bottom left, 3 = bottom right

        ld a,d          ; ld a with plotsector
        or a            ; cp 0
        jr z,uplottl
        dec a           ; cp 1
        jr z,uplottr
        dec a           ; cp 2
        jr z,uplotbl

        ;bottom right quadrant
        ld a,(hl)
        bit 7,a
        ret z
        res 7,a
        xor $07 ;invert all other bits
        ld (hl),a
        ret ;unplot pixel

uplottl: ;top left quadrant
        ld a,(hl)
        bit 7,a
        jr nz,utlinvert
        res 0,a
        ld (hl),a
        ret

utlinvert:
        set 0,a
        ld (hl),a
        ret

uplottr: ;top right quadrant
        ld a,(hl)
        bit 7,a
        jr nz,utrinvert
        res 1,a
        ld (hl),a
        ret

utrinvert:
        set 1,a
        ld (hl),a
        ret


uplotbl: ;bottom left quadrant
        ld a,(hl)
        bit 7,a
        jr nz,ublinvert
        res 2,a
        ld (hl),a
        ret

ublinvert:
        set 2,a
        ld (hl),a
        ret

; end zxplot
; ---

; +++
; Break
;
; preserves state, but will exit if SPACE is pushed

check_break:
        exx                     ; save register states

        ; did the player press break key (space)?
        call $0f46              ; was break pressed? (break-1 ROM routine)
        jr nc,break             ; no, exit as normal

        exx                     ; restore registers
        ret                     ; and return

        ; yes, exit the program as normal
break:
        rst $0008               ; call ERROR-1 reset
        db $ff                  ; with error code 0 (normal exit)

; end break
; ---

; +++
; Delay
;
; set bc to speed
; uses check_break to exit

delay_count: dw $0000
delay:
        ld (delay_count),hl     ; save delay

        call check_break

        ; check if done
        ld hl,(delay_count)     ; grab what to test
        dec hl                  ; subtract 1
        ld a,h                  ; check if done
        or l
        jr nz,delay             ; not zero, keep going!

        ret                     ; pause is done!

; end delay
; ---


; +++
; Fast CLS
;       
; in:           none
; out:          none
; preserves:    none
; destroys:     bc
;
fastcls:
        ld hl,(d_file)
        inc hl
        xor a
        ld c,24         ; rows
fastcls_y_loop:
        ld b,32         ; columns
fastcls_x_loop:
        ld (hl),a       ; clear character
        inc hl
        djnz fastcls_x_loop

        inc hl          ; move past return

        dec c
        jr nz,fastcls_y_loop

        ret

; end fast cls
; ---

; +++
; Floating math
;
; these are from: https://github.com/Zeda/z80float/blob/master/f24/f24div.z80

f24cmp:
;returns the flags for float AHL minus float CDE
;   AHL >= CDE, nc
;   AHL < CDE,  c
;   AHL == CDE, z (and nc)
;
;Note:
;   This allows some wiggle room in the bottom two bits. For example, if the two
;   exponents are the same and the two significands differ by at most 3, they are
;   considered equal.
;
;Note:
;   NaN is a special case. This routines returns that NaN= 15
;check if (B&7F) > 14 + (A&7F)
  ld c,a    ;new exponent, need to save the sign for later comparison
  res 7,b
  and $7f
  add a,14
  sub b
  jr nc,f24cmp_skip_nc
  xor a
  ret

f24cmp_skip_nc:
;otherwise, not equal, so let's return the sign in c and nz
  ld a,c
return_nz_sign_a:
  or $7f
  add a,a
  ret

f24cmp_special:
  ld a,h
  or l
  ccf
  ret nz

;so the first op is inf

;if second of is finite, return the sign of B in carry and nz

  ld a,c
  and $7f
  inc a
  ld a,b
  jp p,return_nz_sign_a

;second op is either NaN or inf
  ld a,d
  or e
  ret nz

; op1 op2 result
; 7F  7F  z, nc
; 7F  FF  nz,nc
; FF  7F  nz,c
; FF  FF  z, nc
  ld a,c
  cp b
  ret

f24add:
;AHL + CDE ==> AHL
;Destroys BC,DE
;
;save A
  ld b,a

;check for special values
  and $7f
  jr nz,return_CDE_skip
return_CDE:
  ld a,c
  ex de,hl
  ret
return_CDE_skip:
  inc a
  jp m,f24add_op1_inf_nan

  ld a,c
;check for special values
  and $7f
  jp z,return_exp_b

  inc a
  jp m,return_CDE


  ld a,b
  xor c
  jp m,f24add_subtract
;we need to add

  call f24add_reorder
  jr z,f24add_add_same_exp
  ret nc
  push bc
  call rshift_1DE
  sla b
  adc hl,de
  ;if carry is reset, then we are all good :)
  pop de
  ld a,d
  ret nc
;otherwise, we need to increment the sign and see if it overflows to inf
  and $7f
  cp $7e
  ld a,d
  jr z,f24_return_inf
  inc a

;we also need to shift a 0 down into the HL
  srl h
  rr l
  ret nc
  inc hl
  ret

f24add_add_same_exp:
  ld a,b
  and $7f
  cp $7e
  ld a,b
  jr z,f24_return_inf
  inc a
  add hl,de
  rr h
  rr l
  ret nc
  inc l
  ret nz
  inc h
  ret nz
  inc a
  ret

f24_return_inf:
  or %01111111
  ld hl,0
  ret


f24add_subtract:
  call f24add_reorder
  jr z,f24add_subtract_same_exp
  ret nc
  push bc
  call rshift_1DE
  sub c
  ld c,a
  ld a,0
  sbc a,b
  ld b,a
  sbc hl,de
  ;if carry is not set, then we are all good :)
  pop de
  ld a,d
  ret nc

  ;otherwise, the implicit bit is set to 0, so we need to renormalize
normalize_D_HLBC:
;D is the sign+exponent
;HLBC is the significand
;returns AHLBC
  ;make sure HLBC is not 0
  ld a,h
  or l
  or b
  or c
  ret z

  ld a,d
normalize_D_HLBC_nonzero:
  ;save the sign
  add a,a
  push af
  rrca

f24add_loop:
  dec a
  jr z,f24add_skip_ahead
  sla c
  rl b
  adc hl,hl
  jp nc,f24add_loop
  ;now round
  sla c
  ld bc,0
  adc hl,bc
  ;if carry is set, then the implicit bit is 2, and the rest of the exponent is 0
  ;so we can just increment A and keep HL as 0
  adc a,b
  add a,a
f24add_skip_ahead:
  ld d,a
  pop af
  ld a,d
  rra
  ret

f24add_subtract_same_exp:
;subtract the significands
  ld a,b
;  or a
  sbc hl,de

;if zero, then the result is zero, but we'll keep the same sign
  jr nz,f24_skip_ahead_nz
  and %10000000
  ret

f24_skip_ahead_nz:
  ;if the carry flag is set, then we need to change the sign of the output
  ;and negate the significand. if reset, then we still need to normalize and whatnot
  ld bc,0
  jr nc,normalize_D_HLBC_nonzero
  xor $80
  ld d,a
  xor a
  sub l
  ld l,a
  sbc a,a
  sub h
  ld h,a
  ld a,d
  jr normalize_D_HLBC_nonzero

f24add_reorder:
  xor c
  rlc c
  rla
;Want to rearrange so that A-C>=0
  sub c
  ret z
  jr nc,f24add_reorder_nc
  neg
  ;A is the difference in exponents
  rrc c
  ld b,c
  ex de,hl
f24add_reorder_nc:
;A is how many bits to shift DE right
;B is the sign+exponent of the result
  or a
  rra
  cp 18
  ret c
return_exp_b:
  ld a,b
  ret

f24add_op1_inf_nan:
  ld a,h
  or l
  jr nz,return_exp_b
;so op1 is +inf or -inf
;If op2 is finite, then just return op1
  or c
  jr z,return_exp_b
  inc a
  add a,a
  jr nz,return_exp_b

;if op2 is NaN, return NaN
  ld a,d
  or e
  ld a,c
  jr nz,f24add_op1_inf_nan_nz

;so |op1| and |op2| are inf
;if they have the same sign, fine, else return NaN
  cp b
  ret z
f24add_op1_inf_nan_nz:
  dec hl
  ret

rshift_1DE:
  ld bc,0
  scf
rshift_1DE_loop:
  rr d
  rr e
  rr b
  rr c
  dec a
  jr nz,rshift_1DE_loop
  ret


f24sub:
;AHL - CDE ==> AHL
;Destroys BC,DE
;
  ld b,a
  ld a,c
  xor $80
  ld c,a
  ld a,b
  jp f24add

f24rsub:
;-AHL + CDE ==> AHL
;Destroys BC,DE
;
  xor $80
  jp f24add

f24mul:
;AHL * CDE ==> AHL
;Destroys BC,DE
;

;put the output sign in the top bit of A
  ld b,a
  ld a,c
  and $80
  xor b

;check for special values
;NaN*x ==> NaN
;0*fin ==> 0
;0*inf ==> NaN
;inf*fin ==> inf
;inf*inf ==> inf

;save A
  ld b,a
  and $7f
  jr z,f24mul_op1_0
  inc a
  jp m,f24mul_op1_inf_nan

;so the first value is finite
  ld a,c
  and $7f
  ld c,a
  ret z
  inc a
  jp m,return_CDE

;upper bit of B is the output sign
;first approximation of the exponent is
; (B&7F) + (C&7F) - 63
  ld a,b
  and $7f
  add a,c
  sub 63
  jr nc,f24mul_nc
  xor a     ;underflowed, so return 0
  ret
f24mul_nc:
  cp $7f
  jr c,f24mul_return_inf_carry
f24mul_return_inf:
  ld a,b
  or %01111111
  ld hl,0   ;overflow so return inf
  ret
f24mul_return_inf_carry:

  xor b
  and $7f
  xor b
f24mul_significand:
;save the exponent
  push af

;now compute (1.HL * 1.DE)
; = (1+.HL)(1+.DE)
; = 1+.HL+.DE+.HL*.DE
  ld b,h
  ld c,l
  push hl
  push de
  call mul16
  ;result is .DEHL
  ;we can discard HL, but first round
  xor a
  sla h
  ex de,hl
  pop bc
  adc hl,bc
  adc a,0
  pop bc
  add hl,bc
  adc a,0
  rra
;now 1+A.HL is the significand
  pop bc    ;B is the exponent
  ld a,b
  ret z
  ccf
  rr h
  rr l
  inc a
  inc a
  add a,a
  jr z,f24mul_return_inf
  rra
  dec a
  ret

f24mul_op1_0:
  ld a,c
  and $7f
  ret z
  inc a
  jp m,f24mul_return_NaN
  xor a
  ret

f24mul_op1_inf_nan:
  ld a,h
  or l
  ld a,b
  ret nz    ;NaN

;inf*0 is NaN
  ld a,c
  and $7f
  jr nz,f24mul_return_NaN_nz
f24mul_return_NaN:
  dec a   ;inf*0
  ld h,a  ;=
  ld l,a  ;NaN
  ret
f24mul_return_NaN_nz:
  inc a
  jp m,f24mul_return_NaN_m
  ld a,b    ;returning inf
  ret
f24mul_return_NaN_m:

;op1 is inf
;op2 is is either NaN or inf
; inf*NaN ==> NaN
; inf*inf ==> inf
;so just return op2's significand
  ld a,c
  ex de,hl
  ret

;This was made by Runer112
;Tested by jacobly
mul16:
;BC*DE --> DEHL
; ~544.887cc as calculated in jacobly's test
;min: 214cc  (DE = 1)
;max: 667cc
;avg: 544.4507883cc   however, deferring to jacobly's result as mine may have math issues ?
;177 bytes
	ld	a,d
	ld	d,0
	ld	h,b
	ld	l,c
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit14
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit13
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit12
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit11
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit10
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit9
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit8
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit7
	ld	a,e
 	and	%11111110
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit6
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit5
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit4
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit3
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit2
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit1
	add	a,a
	jr	c,Mul_BC_DE_DEHL_Bit0
	rr	e
	ret	c
	ld	h,d
	ld	l,e
	ret

Mul_BC_DE_DEHL_Bit14:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit13
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit13:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit12
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit12:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit11
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit11:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit10
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit10:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit9
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit9:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit8
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit8:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit7
	add	hl,bc
	adc	a,d
Mul_BC_DE_DEHL_Bit7:
	ld	d,a
	ld	a,e
	and	%11111110
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit6
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit6:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit5
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit5:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit4
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit4:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit3
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit3:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit2
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit2:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit1
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit1:
	add	hl,hl
	adc	a,a
	jr	nc,Mul_BC_DE_DEHL_Bit0
	add	hl,bc
	adc	a,0
Mul_BC_DE_DEHL_Bit0:
	add	hl,hl
	adc	a,a
	jr	c,Mul_BC_DE_DEHL_FunkyCarry
	rr	e
	ld	e,a
	ret	nc
	add	hl,bc
	ret	nc
	inc	e
	ret	nz
	inc	d
	ret

Mul_BC_DE_DEHL_FunkyCarry:
	inc	d
	rr	e
	ld	e,a
	ret	nc
	add	hl,bc
	ret	nc
	inc	e
	ret


;f24inv:
  ld c,a
  ex de,hl
  ld a,$3f
  ld hl,0
f24div:
;AHL * CDE ==> AHL
;Destroys BC,DE
;
  ;put the output sign in B
  ld b,a
  xor c
  add a,a
  ld a,b
  rla
  rrca
  ld b,a


;check for special values
;NaN/x ==> NaN
;0/fin ==> 0
;  0/0 ==> NaN
;inf/inf ==> NaN
;inf/x ==> inf
;x/NaN ==> NaN
;x/inf ==> 0
;x/0 ==> NaN

  and $7f
  jp z,f24div_0_x
  inc a
  jp m,f24div_infnan_x

  ld a,c
  and $7f
  jr nz,f24div_infnan_x_nz
  dec a
  ld h,a
  ld l,a
  ret
f24div_infnan_x_nz:
  inc a
  jp m,f24div_x_infnan


;upper bit of B is the output sign
;first approximation of the exponent is
; (B&7F) - (C&7F) + 63
  res 7,c
  ld a,b
  and $7f
  add a,63
  sub c
  jr nc,f24div_skip_nc
  xor a     ;underflowed, so return 0
  ret
f24div_skip_nc:
  cp $7f
  jr c,f24div_inf
f24div_return_inf:
  ld a,b
  or %01111111
  ld hl,0   ;overflow so return inf
  ret
f24div_inf:


;now compute (1.HL / 1.DE)
; = (1+.HL)/(1+.DE)

; want 1.HL>1.DE, because then result is going to be 1.x
;so we can end up doing (.HL-.DE)/(1.DE) to 16 bits precision
  or a
  ld c,0    ;top bit of 1.HL-1.DE
  sbc hl,de
  jr nc,f24div_ready
  ;if carry is set, then DE was the larger of the two
  ;so we need to decrement the exponent and do
  ;(HL+DE)*2-DE
  dec a     ;decrement exponent
  ret z     ;return 0 if underflowed
  add hl,de
  add hl,hl
  rl c
  inc c
  sbc hl,de
  jr nc,f24div_ready
  dec c
f24div_ready:
;C.HL is the numerator, 1.DE is the denominator
;A is the exponent, B[7] is the sign
;save the exponent and sign
  push bc
  push af

;we can now commence 16  iterations of this division
  call fdiv24_div16

  pop de
  pop bc
  adc a,d
  jp p,f24div_return_positive
f24div_return_NaN:
  dec a
  ld h,a
  ld l,a
f24div_return_positive:
  xor b
  and $7f
  xor b
  ret

fdiv24_div16:
;negate the divisior for more efficient division
;(16-bit addition is cheaper than subtraction)
  xor a
  sub e
  ld e,a
  ld a,0
  sbc a,d
  ld d,a
  sbc a,a
  dec a
  ld b,a

  ld a,c
  call fdiv24_div8
  rl c
  push bc
  call fdiv24_div8
  rl c
  ;check if  2*A.HL>1.DE
  add hl,hl
  adc a,a
  add hl,de
  adc a,b

  pop hl
  ld h,l
  ld l,c
  ld bc,0
  ld a,b
  adc hl,bc
  ret

fdiv24_div8:
  call fdiv24_div4
fdiv24_div4:
  call fdiv24_div2
fdiv24_div2:
  call fdiv24_div1
fdiv24_div1:
  rl c
  add hl,hl
  adc a,a
  ret z
  add hl,de
  adc a,b
  ret c
  sbc hl,de
  sbc a,b
  ret

f24div_0_x:
;make sure we aren't trying 0/NaN or 0/0
  ld a,c
  and $7f
  jr z,f24div_return_NaN
  inc a
  jp m,f24div_x_infnan
  xor a
  ret
f24div_x_infnan:
  ld a,d
  or e
  ret z
  ld a,c
  ex de,hl
  ret

f24div_infnan_x:
  ld a,h
  or l
  ld a,b
  ret nz
  ;make sure x is not inf NaN or 0
  ld a,c
  and $7f
  jr z,f24div_return_NaN
  inc a
  jp m,f24div_return_NaN
  ld a,b
  ret

;
; ---

; +++
;
; Data and Defines
;

; ZX81 system vars
d_file:         equ $400c
df_cc:          equ 16398
last_k:         equ 16421
margin:         equ 16424
s_posn:         equ 16441
frames:         equ 16436

; ZX81 ROM functions
kscan:          equ $02bb
findchar:       equ $07bd
stop:           equ $0cdc
slow:           equ $0f2b
fast:           equ $02e7
save:           equ $02f9
printat:        equ $08f5
pause:          equ $0f35
cls:            equ $0a2a

; ZX81 Characters (not ASCII)
_sp:            equ $00
_qu:            equ $0b
_lb:            equ $0c
_dl:            equ $0d
_cl:            equ $0e
_lp:            equ $10
_rp:            equ $11
_gt:            equ $12
_lt:            equ $13
_eq:            equ $14
_pl:            equ $15
_mi:            equ $16
_as:            equ $17
_sl:            equ $18
_sc:            equ $19
_cm:            equ $1a
_pr:            equ $1b
_0_:            equ $1c
_1_:            equ $1d
_2_:            equ $1e
_3_:            equ $1f
_4_:            equ $20
_5_:            equ $21
_6_:            equ $22
_7_:            equ $23
_8_:            equ $24
_9_:            equ $25
_a_:            equ $26
_b_:            equ $27
_c_:            equ $28
_d_:            equ $29
_e_:            equ $2a
_f_:            equ $2b
_g_:            equ $2c
_h_:            equ $2d
_i_:            equ $2e
_j_:            equ $2f
_k_:            equ $30
_l_:            equ $31
_m_:            equ $32
_n_:            equ $33
_o_:            equ $34
_p_:            equ $35
_q_:            equ $36
_r_:            equ $37
_s_:            equ $38
_t_:            equ $39
_u_:            equ $3a
_v_:            equ $3b
_w_:            equ $3c
_x_:            equ $3d
_y_:            equ $3e
_z_:            equ $3f

; end defines
; ---