Skip to content

Commit 0132fd6

Browse files
committed
optimized fpmul multiplication and moved labels into jr range
1 parent a90ffb2 commit 0132fd6

File tree

1 file changed

+71
-62
lines changed

1 file changed

+71
-62
lines changed

src/crt/fpmul.src

Lines changed: 71 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,59 @@
44

55
public __fpmul
66

7+
; ensures that all labels can be reached with jr
8+
__fpmul_helper:
9+
.nonfinite:
10+
sbc hl, bc
11+
jq z, .return.2
12+
add hl, bc
13+
pop bc
14+
jq .return.1
15+
16+
.nonfinite.1:
17+
inc d
18+
jq z, .nonfinite
19+
ex de, hl
20+
pop hl
21+
add hl, bc
22+
or a, a
23+
sbc hl, bc
24+
ex de, hl
25+
jq nz, .return.1
26+
ld h, a
27+
.return.1:
28+
pop af
29+
ex (sp), hl
30+
pop bc
31+
jq .return
32+
33+
.nonfinite.2:
34+
add hl, bc
35+
or a, a
36+
sbc hl, bc
37+
jq z, .return.nan
38+
.return.2:
39+
pop bc, af, bc
40+
push bc
41+
.return.pop:
42+
pop hl
43+
.return:
44+
pop de
45+
or a, 07Fh
46+
ret
47+
48+
.return.nan:
49+
pop bc
50+
set 7, b
51+
pop af
52+
jq .return.pop
53+
54+
__fpmul_helper.underflow:
55+
pop af, af, hl, de
56+
and a, 080h
57+
ld bc, 0
58+
ret
59+
760
; IEEE single precision multiplication
861
; aubc = aubc * euhl
962
__fpmul: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(float, pair8_24_t, { in.BC, in.A }) * bitcast(float, pair8_24_t, { in.HL, in.E })) && out.DE == in.DE && out.HL == in.HL && out.IX == in.IX && out.IY == in.IY
@@ -18,11 +71,11 @@ __fpmul: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(fl
1871
ld e, a
1972
call __fppop2
2073
inc e
21-
jq z, .nonfinite.1
74+
jr z, __fpmul_helper.nonfinite.1
2275
inc d
23-
jq z, .nonfinite.2
76+
jr z, __fpmul_helper.nonfinite.2
2477
ld a, d
25-
ld d, b; 0
78+
ld d, b ; ld d, 0
2679
rlc e
2780
ccf
2881
rr e
@@ -38,7 +91,7 @@ __fpmul: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(fl
3891
jq z, .subnormal
3992
jq c, .continue
4093
cp a, -23
41-
jq c, .underflow
94+
jr c, __fpmul_helper.underflow
4295
.subnormal:
4396
dec a
4497
ld d, a
@@ -75,26 +128,28 @@ __fpmul: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(fl
75128
ld bc, (ix + 12)
76129
mlt bc
77130
add hl, bc
131+
78132
ld (ix + 2), hl
79-
ld hl, (ix + 3)
80-
inc hl
81-
dec.s hl
82-
ld c, (ix + 11)
83-
ld b, (ix + 15)
84-
mlt bc
133+
ld b, (ix + 4)
134+
ld c, h
135+
ld l, (ix + 11)
136+
ld h, (ix + 15)
137+
mlt hl ; clears UHL
85138
add hl, bc
139+
86140
ld c, (ix + 12)
87141
ld b, (ix + 14)
88142
mlt bc
89143
add hl, bc
144+
90145
ld (ix + 3), hl
91-
ld hl, (ix + 4)
92-
inc hl
93-
dec.s hl
94-
ld c, (ix + 12)
95-
ld b, (ix + 15)
96-
mlt bc
146+
ld b, (ix + 5)
147+
ld c, h
148+
ld l, (ix + 12)
149+
ld h, (ix + 15)
150+
mlt hl ; clears UHL
97151
add hl, bc
152+
98153
cp a, d
99154
jq nz, .normalized
100155
ld bc, (ix + 2)
@@ -127,52 +182,6 @@ __fpmul: ; CHECK: same(bitcast(float, pair8_24_t, { out.BC, out.A }), bitcast(fl
127182
ex (sp), hl
128183
pop bc, de
129184
ret
130-
.underflow:
131-
pop af, af, hl, de
132-
and a, 080h
133-
ld bc, 0
134-
ret
135-
.nonfinite:
136-
sbc hl, bc
137-
jq z, .return.2
138-
add hl, bc
139-
pop bc
140-
jq .return.1
141-
.nonfinite.1:
142-
inc d
143-
jq z, .nonfinite
144-
ex de, hl
145-
pop hl
146-
add hl, bc
147-
or a, a
148-
sbc hl, bc
149-
ex de, hl
150-
jq nz, .return.1
151-
ld h, a
152-
.return.1:
153-
pop af
154-
ex (sp), hl
155-
pop bc
156-
jq .return
157-
.nonfinite.2:
158-
add hl, bc
159-
or a, a
160-
sbc hl, bc
161-
jq z, .return.nan
162-
.return.2:
163-
pop bc, af, bc
164-
push bc
165-
.return.pop:
166-
pop hl
167-
.return:
168-
pop de
169-
or a, 07Fh
170-
ret
171-
.return.nan:
172-
pop bc
173-
set 7, b
174-
pop af
175-
jq .return.pop
176185

177186
extern __fppop1
178187
extern __fppop2

0 commit comments

Comments
 (0)