@@ -34,48 +34,39 @@ declare { ptr, i20, i20 } @llvm.aie2.add.3d(ptr, i20, i20, i20, i20, i20, i20, i
34
34
define void @add2d (ptr noalias %params , ptr noalias %ifm1_data , ptr noalias %ifm2_data , ptr noalias %ofm_data , ptr %.out , ptr %conv.i.i.i.out , ptr %idx.ext9.out , ptr %.out1 , ptr %.out2 , ptr %.out3 , ptr %.out4 , ptr %.out5 , ptr %conv.i.i.i.i.i.out , ptr %.out6 , ptr %conv.i.i.i46.out , ptr %xtraiter.out , ptr %in_ptr1.051.unr.ce.out , ptr %in_ptr2.0.in50.unr.ce.out , ptr %out_ptr.049.unr.ce.out , ptr %itr_left_cnt0.048.unr.ce.out , ptr %itr_left_cnt1.047.unr.ce.out ) #3 {
35
35
; ASM-LABEL: add2d:
36
36
; ASM: // %bb.0: // %newFuncRoot
37
- ; ASM-NEXT: lda r2, [p0, #64]; paddb [p0], #40; nopm
37
+ ; ASM-NEXT: lda r2, [p0, #64]; paddb [p0], #40; nopxm ; nops
38
38
; ASM-NEXT: lda m2, [p0], #-4
39
39
; ASM-NEXT: lda m5, [p0], #8
40
40
; ASM-NEXT: lda m4, [p0], #8
41
- ; ASM-NEXT: lda m3, [p0], #-24; paddb [sp], #32
42
- ; ASM-NEXT: lda r0, [p0], #4; st p6, [sp, #-28] // 4-byte Folded Spill
43
- ; ASM-NEXT: lda r1, [p0], #-12; mov p6, sp
44
- ; ASM-NEXT: lda r3, [p0], #40; paddb [p6], #-36
45
- ; ASM-NEXT: lda p7, [p6, #0]; mov p6, sp
46
- ; ASM-NEXT: paddb [p6], #-40
47
- ; ASM-NEXT: lda r5, [p6, #0]; mov p6, sp
48
- ; ASM-NEXT: lda m1, [p0], #36; paddb [p6], #-44
49
- ; ASM-NEXT: lda p6, [p6, #0]
50
- ; ASM-NEXT: lda m0, [p0], #-8; st p7, [sp, #-32] // 4-byte Folded Spill
51
- ; ASM-NEXT: lda dn0, [p0], #-8; st r3, [p4, #0]
52
- ; ASM-NEXT: lda dj0, [p0], #12; nez r4, r1; mov p4, sp
53
- ; ASM-NEXT: lda dn4, [p0], #-8; paddb [p4], #-48; st r4, [p5, #0]
54
- ; ASM-NEXT: lda p4, [p4, #0]; mov p5, sp
55
- ; ASM-NEXT: lda dj4, [p0], #-36; st m1, [p7, #0]
56
- ; ASM-NEXT: lda r1, [p0, #0]; mov p7, r5
57
- ; ASM-NEXT: lda r5, [p0, #-36]; paddb [p5], #-52; mov p0, sp
58
- ; ASM-NEXT: lda p5, [p5, #0]; st m0, [p7, #0]
59
- ; ASM-NEXT: paddb [p0], #-72; mov p7, sp
60
- ; ASM-NEXT: lda p0, [p0, #0]; paddb [p7], #-56; st dj0, [p6, #0]
61
- ; ASM-NEXT: lda r6, [p7, #0]; mov p6, sp
62
- ; ASM-NEXT: paddb [p6], #-60; mov p7, sp
63
- ; ASM-NEXT: lda r7, [p6, #0]; paddb [p7], #-64; mov p6, sp
64
- ; ASM-NEXT: lda p7, [p7, #0]; st dj4, [p4, #0]
65
- ; ASM-NEXT: mov p4, sp
66
- ; ASM-NEXT: paddb [p4], #-76; st dn0, [p5, #0]
67
- ; ASM-NEXT: lda r11, [p4, #0]; paddb [p6], #-68; mov p4, sp
68
- ; ASM-NEXT: lda r8, [p6, #0]; paddb [p4], #-80; mov p5, r6
69
- ; ASM-NEXT: lda p6, [p4, #0]; mov p4, sp
70
- ; ASM-NEXT: mova r6, #1; paddb [p4], #-84; nez r1, r1; st dn4, [p5, #0]
71
- ; ASM-NEXT: lda r9, [p4, #0]; ne r6, r0, r6; mov p4, sp
72
- ; ASM-NEXT: mova r0, #3; paddb [p4], #-88; add r7, r2, #-1; mov p5, r7
73
- ; ASM-NEXT: lda r10, [p4, #0]; ltu r7, r7, r0; mov p4, sp
74
- ; ASM-NEXT: jz r7, #.LBB0_2
75
- ; ASM-NEXT: paddb [p4], #-92; st r1, [p5, #0] // Delay Slot 5
76
- ; ASM-NEXT: lda p4, [p4, #0]; st r5, [p7, #0] // Delay Slot 4
77
- ; ASM-NEXT: paddb [p2], m5; mov p7, r8 // Delay Slot 3
78
- ; ASM-NEXT: st r6, [p7, #0]; paddb [p2], m4; and r8, r2, r0 // Delay Slot 2
41
+ ; ASM-NEXT: lda m3, [p0], #-24
42
+ ; ASM-NEXT: lda r0, [p0], #4
43
+ ; ASM-NEXT: lda r1, [p0], #-12
44
+ ; ASM-NEXT: lda r3, [p0], #40
45
+ ; ASM-NEXT: lda m1, [p0], #36
46
+ ; ASM-NEXT: lda m0, [p0], #-8
47
+ ; ASM-NEXT: lda dn0, [p0], #-8
48
+ ; ASM-NEXT: lda dj0, [p0], #12
49
+ ; ASM-NEXT: lda dn4, [p0], #-8; paddb [sp], #32
50
+ ; ASM-NEXT: lda dj4, [p0], #-36; st p7, [sp, #-32] // 4-byte Folded Spill
51
+ ; ASM-NEXT: lda r1, [p0, #0]; mov p7, sp
52
+ ; ASM-NEXT: lda r5, [p0, #-36]; paddb [p7], #-36
53
+ ; ASM-NEXT: lda p0, [p7], #-4
54
+ ; ASM-NEXT: lda p0, [p7], #-4
55
+ ; ASM-NEXT: lda p0, [p7], #-4
56
+ ; ASM-NEXT: lda p0, [p7], #-4
57
+ ; ASM-NEXT: lda p0, [p7], #-4; st p6, [sp, #-28]; nez r4, r1 // 4-byte Folded Spill
58
+ ; ASM-NEXT: lda p5, [p7], #-4; st r3, [p4, #0]
59
+ ; ASM-NEXT: lda p6, [p7], #-4; st r4, [p5, #0]
60
+ ; ASM-NEXT: lda p0, [p7], #-4; st m1, [p0, #0]; add r7, r2, #-1; mov r6, #1
61
+ ; ASM-NEXT: lda p4, [p7], #-4; st m0, [p0, #0]; ne r6, r0, r6
62
+ ; ASM-NEXT: lda r13, [p7], #-4; st dj0, [p0, #0]; movx r0, #3
63
+ ; ASM-NEXT: st dj4, [p0, #0]; ltu r7, r7, r0
64
+ ; ASM-NEXT: lda r9, [p7], #-4; st dn0, [p0, #0]; nez r1, r1
65
+ ; ASM-NEXT: lda r10, [p7], #-4; st dn4, [p5, #0]; jz r7, #.LBB0_2
66
+ ; ASM-NEXT: lda r11, [p7], #-4; st r1, [p6, #0] // Delay Slot 5
67
+ ; ASM-NEXT: lda p7, [p7, #-4]; st r5, [p0, #0] // Delay Slot 4
68
+ ; ASM-NEXT: paddb [p2], m5; st r6, [p4, #0] // Delay Slot 3
69
+ ; ASM-NEXT: lda r12, [p7, #0]; paddb [p2], m4; and r8, r2, r0; mov p0, r13 // Delay Slot 2
79
70
; ASM-NEXT: padda [p1], m2; paddb [p2], m3; movx r0, #0; st r8, [p0, #0] // Delay Slot 1
80
71
; ASM-NEXT: // %bb.1:
81
72
; ASM-NEXT: j #.LBB0_5
@@ -123,18 +114,20 @@ define void @add2d(ptr noalias %params, ptr noalias %ifm1_data, ptr noalias %ifm
123
114
; ASM-NEXT: nop
124
115
; ASM-NEXT: nop
125
116
; ASM-NEXT: vst.srs.d8.s32 cm0, s0, [p3], #32
126
- ; ASM-NEXT: vst.srs.d8.s32 cm2, s0, [p3], #32; mov crUPSSign, #0
127
- ; ASM-NEXT: vst.srs.d8.s32 cm3, s0, [p3], #32; mov r0, dc0
128
- ; ASM-NEXT: vst.srs.d8.s32 cm1, s0, [p3], #32; mov r1, dc4
117
+ ; ASM-NEXT: vst.srs.d8.s32 cm2, s0, [p3], #32
118
+ ; ASM-NEXT: vst.srs.d8.s32 cm3, s0, [p3], #32; mov crUPSSign, #0
119
+ ; ASM-NEXT: vst.srs.d8.s32 cm1, s0, [p3], #32; mov r0, dc0
120
+ ; ASM-NEXT: mov r1, dc4
129
121
; ASM-NEXT: mov crSRSSign, #0
130
122
; ASM-NEXT: .LBB0_5: // %for.cond.cleanup.unr-lcssa.split
131
- ; ASM-NEXT: nopx ; mov p0, r10
132
- ; ASM-NEXT: lda p7, [sp, #-32]; st r1, [p4, #0] // 4-byte Folded Reload
133
- ; ASM-NEXT: lda p6, [sp, #-28]; st r0, [p0, #0] // 4-byte Folded Reload
134
- ; ASM-NEXT: ret lr ; mov p0, r9
135
- ; ASM-NEXT: st p3, [p0, #0] // Delay Slot 5
136
- ; ASM-NEXT: mov p0, r11 // Delay Slot 4
137
- ; ASM-NEXT: st p2, [p6, #0] // Delay Slot 3
123
+ ; ASM-NEXT: st r1, [p7, #0]; nopx
124
+ ; ASM-NEXT: mov p0, r12
125
+ ; ASM-NEXT: lda p7, [sp, #-32]; st r0, [p0, #0] // 4-byte Folded Reload
126
+ ; ASM-NEXT: lda p6, [sp, #-28]; mov p0, r11 // 4-byte Folded Reload
127
+ ; ASM-NEXT: st p3, [p0, #0]; ret lr
128
+ ; ASM-NEXT: mov p0, r10 // Delay Slot 5
129
+ ; ASM-NEXT: st p2, [p0, #0] // Delay Slot 4
130
+ ; ASM-NEXT: mov p0, r9 // Delay Slot 3
138
131
; ASM-NEXT: st p1, [p0, #0] // Delay Slot 2
139
132
; ASM-NEXT: paddb [sp], #-32 // Delay Slot 1
140
133
newFuncRoot:
0 commit comments