Skip to content

Commit

Permalink
[WAW reg] ignore non vector and accumulator registers
Browse files Browse the repository at this point in the history
  • Loading branch information
F-Stuckmann committed Oct 4, 2024
1 parent 80ae887 commit 4a65ca1
Show file tree
Hide file tree
Showing 15 changed files with 139 additions and 60 deletions.
27 changes: 27 additions & 0 deletions llvm/lib/Target/AIE/AIE2RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,30 @@ AIE2RegisterInfo::getCoveringSubRegs(const TargetRegisterClass &RC) const {
}
return Subregs;
}

bool AIE2RegisterInfo::isVecOrAccRegClass(const TargetRegisterClass &RC) const {
// ******** Vector classes ********
if (AIE2::VEC128RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::VEC256RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::VEC512RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::VEC1024RegClass.hasSubClassEq(&RC))
return true;

// ******** Accumulator classes ********
if (AIE2::ACC256RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::ACC512RegClass.hasSubClassEq(&RC))
return true;

if (AIE2::ACC1024RegClass.hasSubClassEq(&RC))
return true;

return false;
}
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/AIE2RegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ struct AIE2RegisterInfo : public AIE2GenRegisterInfo {
const TargetRegisterClass *get3DIteratorRegClass() const override {
return &AIE2::eDSRegClass;
}
bool isVecOrAccRegClass(const TargetRegisterClass &RC) const override;
};
} // namespace llvm

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ struct AIEBaseRegisterInfo : public TargetRegisterInfo {
bool isSimplifiableReservedReg(MCRegister PhysReg) const override {
return false;
}

virtual bool isVecOrAccRegClass(const TargetRegisterClass &RC) const {
llvm_unreachable("Target didn't implement isVecOrAccRegClass()");
}

#if 0
/// Returns a BitVector of the intersection of GPR RegClass
/// and CalleeSaved Registers
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AIE/AIEWawRegRewriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,9 @@ bool AIEWawRegRewriter::isWorthRenaming(const Register &Reg,
if (!UsedPhysRegs[VRM->getPhys(Reg)])
return false;

if (!TRI->isVecOrAccRegClass(*(MRI->getRegClass(Reg))))
return false;

return !VRegWithCopies[Reg.virtRegIndex()];
}

Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AIE/aie2/GlobalISel/legalize-dyn-stackalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
; CHECK-NEXT: nopa ; nopx ; mov p6, sp
; CHECK-NEXT: mov p1, sp
; CHECK-NEXT: lshl r0, r17, r19
; CHECK-NEXT: add r1, r0, #31
; CHECK-NEXT: add r0, r0, #31
; CHECK-NEXT: jl #extern_call
; CHECK-NEXT: mov p0, p1 // Delay Slot 5
; CHECK-NEXT: and r2, r1, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r2 // Delay Slot 3
; CHECK-NEXT: and r0, r0, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r0 // Delay Slot 3
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 2
; CHECK-NEXT: mov sp, p1 // Delay Slot 1
; CHECK-NEXT: nopa ; nopb ; add r17, r17, #1; nopm ; nops
; CHECK-NEXT: ltu r3, r17, r16
; CHECK-NEXT: xor r4, r17, r18
; CHECK-NEXT: add r21, r21, r3
; CHECK-NEXT: or r5, r4, r21
; CHECK-NEXT: jnz r5, #.LBB1_1
; CHECK-NEXT: ltu r0, r17, r16
; CHECK-NEXT: add r21, r21, r0
; CHECK-NEXT: xor r0, r17, r18
; CHECK-NEXT: or r0, r0, r21
; CHECK-NEXT: jnz r0, #.LBB1_1
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AIE/aie2/dyn-stackalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ define void @test_loop_dyn_alloca(i32 noundef %n) {
; CHECK-NEXT: nopa ; nopx ; mov p6, sp
; CHECK-NEXT: mov p1, sp
; CHECK-NEXT: lshl r0, r17, r19
; CHECK-NEXT: add r1, r0, #31
; CHECK-NEXT: add r0, r0, #31
; CHECK-NEXT: jl #extern_call
; CHECK-NEXT: mov p0, p1 // Delay Slot 5
; CHECK-NEXT: and r2, r1, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r2 // Delay Slot 3
; CHECK-NEXT: and r0, r0, r20 // Delay Slot 4
; CHECK-NEXT: mov m0, r0 // Delay Slot 3
; CHECK-NEXT: paddb [p1], m0 // Delay Slot 2
; CHECK-NEXT: mov sp, p1 // Delay Slot 1
; CHECK-NEXT: nopa ; nopb ; add r17, r17, #1; nopm ; nops
; CHECK-NEXT: ltu r3, r17, r16
; CHECK-NEXT: xor r4, r17, r18
; CHECK-NEXT: add r21, r21, r3
; CHECK-NEXT: or r5, r4, r21
; CHECK-NEXT: jnz r5, #.LBB1_1
; CHECK-NEXT: ltu r0, r17, r16
; CHECK-NEXT: add r21, r21, r0
; CHECK-NEXT: xor r0, r17, r18
; CHECK-NEXT: or r0, r0, r21
; CHECK-NEXT: jnz r0, #.LBB1_1
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AIE/aie2/end-to-end/Conv2D-red-swp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -224,16 +224,16 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; DCL-LABEL: conv2d.loop.nest:
; DCL: .p2align 4
; DCL-NEXT: // %bb.0: // %newFuncRoot
; DCL-NEXT: paddb [sp], #192; nopa ; nops ; nopxm ; nopv
; DCL-NEXT: st p6, [sp, #-188]; nopx // 4-byte Folded Spill
; DCL-NEXT: paddb [sp], #192
; DCL-NEXT: st p6, [sp, #-188] // 4-byte Folded Spill
; DCL-NEXT: mov p6, sp
; DCL-NEXT: paddb [p6], #-292
; DCL-NEXT: lda m0, [p6, #0]; mov p6, sp
; DCL-NEXT: paddb [p6], #-296
; DCL-NEXT: lda dj0, [p6, #0]; mov p6, sp
; DCL-NEXT: paddb [p6], #-300
; DCL-NEXT: lda dn0, [p6, #0]; mov p6, sp
; DCL-NEXT: paddb [p6], #-204; mov r29, r16
; DCL-NEXT: paddb [p6], #-204
; DCL-NEXT: lda m0, [p6, #0]; mov p6, sp
; DCL-NEXT: paddb [p6], #-208; mov s0, r0
; DCL-NEXT: lda dj0, [p6, #0]; mov p6, sp
Expand Down Expand Up @@ -314,7 +314,7 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; DCL-NEXT: vldb wl1, [p1], #32; add r0, r10, #33; mov r10, p0
; DCL-NEXT: vldb wh1, [p1], #32; add r1, r5, #-1; vshuffle x7, x4, x2, r2
; DCL-NEXT: vldb wl10, [p1], #32; add r1, r1, #-1; vshuffle x9, x7, x0, r8
; DCL-NEXT: vldb wh10, [p1], #32; and r16, r10, r9
; DCL-NEXT: vldb wh10, [p1], #32; and r10, r10, r9
; DCL-NEXT: .p2align 4
; DCL-NEXT: .LBB0_2: // %inner.loop
; DCL-NEXT: // Parent Loop BB0_1 Depth=1
Expand All @@ -327,8 +327,8 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; DCL-NEXT: vldb.3d wh3, [p0], d0; vshuffle x7, x4, x2, r2; vmac cm4, cm4, x7, x8, r4 // Delay Slot 5
; DCL-NEXT: vldb wl1, [p1], #32; vshuffle x9, x7, x0, r8; vmac cm2, cm2, x9, x6, r4 // Delay Slot 4
; DCL-NEXT: vldb wh1, [p1], #32; vmov x6, x1; vmac cm6, cm6, x9, x8, r4 // Delay Slot 3
; DCL-NEXT: vldb wl10, [p1], #32; add r0, r16, #33; mov r10, p0; vmac cm3, cm3, x11, x6, r4 // Delay Slot 2
; DCL-NEXT: vldb wh10, [p1], #32; and r16, r10, r9; vmov x8, x10; vmac cm7, cm7, x11, x8, r4 // Delay Slot 1
; DCL-NEXT: vldb wl10, [p1], #32; add r0, r10, #33; mov r10, p0; vmac cm3, cm3, x11, x6, r4 // Delay Slot 2
; DCL-NEXT: vldb wh10, [p1], #32; and r10, r10, r9; vmov x8, x10; vmac cm7, cm7, x11, x8, r4 // Delay Slot 1
; DCL-NEXT: // %bb.3: // in Loop: Header=BB0_1 Depth=1
; DCL-NEXT: nopa ; nopx ; vmov x11, x0
; DCL-NEXT: vshuffle x0, x4, x2, r3
Expand Down Expand Up @@ -375,28 +375,28 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; DCL-NEXT: padda.3d [p2], d3; st m7, [sp, #-96] // 4-byte Folded Spill Delay Slot 2
; DCL-NEXT: padda.3d [p1], d2; mov r28, dc7 // Delay Slot 1
; DCL-NEXT: // %bb.4: // %exitStub
; DCL-NEXT: lda p7, [sp, #-192]; nopxm // 4-byte Folded Reload
; DCL-NEXT: lda p7, [sp, #-192]; nopb ; nopxm // 4-byte Folded Reload
; DCL-NEXT: lda p6, [sp, #-188] // 4-byte Folded Reload
; DCL-NEXT: ret lr
; DCL-NEXT: nop // Delay Slot 5
; DCL-NEXT: nop // Delay Slot 4
; DCL-NEXT: nop // Delay Slot 3
; DCL-NEXT: nop // Delay Slot 2
; DCL-NEXT: paddb [sp], #-192; mov r16, r29 // Delay Slot 1
; DCL-NEXT: paddb [sp], #-192 // Delay Slot 1
;
; ZOL-LABEL: conv2d.loop.nest:
; ZOL: .p2align 4
; ZOL-NEXT: // %bb.0: // %newFuncRoot
; ZOL-NEXT: paddb [sp], #192; nopa ; nops ; nopxm ; nopv
; ZOL-NEXT: st p6, [sp, #-188]; nopx // 4-byte Folded Spill
; ZOL-NEXT: paddb [sp], #192
; ZOL-NEXT: st p6, [sp, #-188] // 4-byte Folded Spill
; ZOL-NEXT: mov p6, sp
; ZOL-NEXT: paddb [p6], #-292
; ZOL-NEXT: lda m0, [p6, #0]; mov p6, sp
; ZOL-NEXT: paddb [p6], #-296
; ZOL-NEXT: lda dj0, [p6, #0]; mov p6, sp
; ZOL-NEXT: paddb [p6], #-300
; ZOL-NEXT: lda dn0, [p6, #0]; mov p6, sp
; ZOL-NEXT: paddb [p6], #-204; mov r28, r16
; ZOL-NEXT: paddb [p6], #-204
; ZOL-NEXT: lda m0, [p6, #0]; mov p6, sp
; ZOL-NEXT: paddb [p6], #-208; mov s0, r0
; ZOL-NEXT: lda dj0, [p6, #0]; mov p6, sp
Expand Down Expand Up @@ -478,7 +478,7 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ZOL-NEXT: vldb wh1, [p1], #32; nopa ; nops ; add r0, r1, #33; mov r1, p0; nopv
; ZOL-NEXT: vldb wl10, [p1], #32; nopa ; nops ; nopx ; vshuffle x7, x4, x2, r2; nopv
; ZOL-NEXT: vldb wh10, [p1], #32; nopa ; nops ; nopx ; vshuffle x9, x7, x0, r8; nopv
; ZOL-NEXT: nopb ; nopa ; nops ; and r16, r1, r9; nopm ; nopv
; ZOL-NEXT: nopb ; nopa ; nops ; and r1, r1, r9; nopm ; nopv
; ZOL-NEXT: .p2align 4
; ZOL-NEXT: .LBB0_2: // %inner.loop
; ZOL-NEXT: // Parent Loop BB0_1 Depth=1
Expand All @@ -490,9 +490,9 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ZOL-NEXT: vldb.3d wh3, [p0], d0; vshuffle x7, x4, x2, r2; vmac cm4, cm4, x7, x8, r4
; ZOL-NEXT: vldb wl1, [p1], #32; vshuffle x9, x7, x0, r8; vmac cm2, cm2, x9, x6, r4
; ZOL-NEXT: vldb wh1, [p1], #32; vmov x6, x1; vmac cm6, cm6, x9, x8, r4
; ZOL-NEXT: vldb wl10, [p1], #32; add r0, r16, #33; mov r1, p0; vmac cm3, cm3, x11, x6, r4
; ZOL-NEXT: vldb wl10, [p1], #32; add r0, r1, #33; mov r1, p0; vmac cm3, cm3, x11, x6, r4
; ZOL-NEXT: .L_LEnd0:
; ZOL-NEXT: vldb wh10, [p1], #32; nopa ; nops ; and r16, r1, r9; vmov x8, x10; vmac cm7, cm7, x11, x8, r4
; ZOL-NEXT: vldb wh10, [p1], #32; nopa ; nops ; and r1, r1, r9; vmov x8, x10; vmac cm7, cm7, x11, x8, r4
; ZOL-NEXT: // %bb.3: // in Loop: Header=BB0_1 Depth=1
; ZOL-NEXT: nopa ; nopx ; vmov x11, x0
; ZOL-NEXT: vshuffle x0, x4, x2, r3
Expand Down Expand Up @@ -539,14 +539,14 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ZOL-NEXT: padda.3d [p2], d3; st m7, [sp, #-96] // 4-byte Folded Spill Delay Slot 2
; ZOL-NEXT: padda.3d [p1], d2; mov r27, dc7 // Delay Slot 1
; ZOL-NEXT: // %bb.4: // %exitStub
; ZOL-NEXT: lda p7, [sp, #-192]; nopxm // 4-byte Folded Reload
; ZOL-NEXT: lda p7, [sp, #-192]; nopb ; nopxm // 4-byte Folded Reload
; ZOL-NEXT: lda p6, [sp, #-188] // 4-byte Folded Reload
; ZOL-NEXT: ret lr
; ZOL-NEXT: nop // Delay Slot 5
; ZOL-NEXT: nop // Delay Slot 4
; ZOL-NEXT: nop // Delay Slot 3
; ZOL-NEXT: nop // Delay Slot 2
; ZOL-NEXT: paddb [sp], #-192; mov r16, r28 // Delay Slot 1
; ZOL-NEXT: paddb [sp], #-192 // Delay Slot 1
newFuncRoot:
br label %outer.loop.header

Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AIE/aie2/end-to-end/Conv2D-red.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-LABEL: conv2d.loop.nest:
; ASM: .p2align 4
; ASM-NEXT: // %bb.0: // %newFuncRoot
; ASM-NEXT: paddb [sp], #32
; ASM-NEXT: nopa ; paddb [sp], #32; nopx
; ASM-NEXT: st p6, [sp, #-28] // 4-byte Folded Spill
; ASM-NEXT: mov p6, sp
; ASM-NEXT: paddb [p6], #-132
Expand All @@ -54,9 +54,9 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: lda dj4, [p6, #0]; mov p6, sp
; ASM-NEXT: paddb [p6], #-56
; ASM-NEXT: lda dn0, [p6, #0]; mov p6, sp
; ASM-NEXT: paddb [p6], #-60; mov r29, r16
; ASM-NEXT: paddb [p6], #-60
; ASM-NEXT: lda dn4, [p6, #0]; mov p6, sp
; ASM-NEXT: paddb [p6], #-68; mov r30, r17
; ASM-NEXT: paddb [p6], #-68
; ASM-NEXT: lda r10, [p6, #0]; mov p6, sp
; ASM-NEXT: paddb [p6], #-72; mov s0, r0
; ASM-NEXT: lda dj1, [p6, #0]; mov p6, sp
Expand Down Expand Up @@ -109,7 +109,7 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: vlda.ups.s32.s16 bmh6, s0, [p2, #32]
; ASM-NEXT: vlda.ups.s32.s16 bml6, s0, [p2], m1; mov r0, p0
; ASM-NEXT: vlda.ups.s32.s16 bmh7, s0, [p2, #32]; and r0, r0, r9
; ASM-NEXT: vlda.ups.s32.s16 bml7, s0, [p2, #0]; add r17, r0, #33; mov r0, r5
; ASM-NEXT: vlda.ups.s32.s16 bml7, s0, [p2, #0]; add r1, r0, #33; mov r0, r5
; ASM-NEXT: .p2align 4
; ASM-NEXT: .LBB0_2: // %inner.loop
; ASM-NEXT: // Parent Loop BB0_1 Depth=1
Expand All @@ -123,8 +123,8 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: vldb wh10, [p1], #32
; ASM-NEXT: vldb wl7, [p1], #32
; ASM-NEXT: vldb wh7, [p1], #32
; ASM-NEXT: vshift.align x4, x4, s1, x6, r17
; ASM-NEXT: vshift.align x2, x2, s1, x8, r17
; ASM-NEXT: vshift.align x4, x4, s1, x6, r1
; ASM-NEXT: vshift.align x2, x2, s1, x8, r1
; ASM-NEXT: vshuffle x9, x4, x2, r2
; ASM-NEXT: vshuffle x3, x4, x2, r3
; ASM-NEXT: vmac cm0, cm0, x9, x10, r4
Expand All @@ -133,8 +133,8 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: vshuffle x5, x3, x0, r8; vmac cm6, cm6, x3, x7, r4 // Delay Slot 5
; ASM-NEXT: vmac cm1, cm1, x1, x10, r4 // Delay Slot 4
; ASM-NEXT: mov r1, p0; vmac cm3, cm3, x5, x10, r4 // Delay Slot 3
; ASM-NEXT: and r16, r1, r9; vmac cm5, cm5, x1, x7, r4 // Delay Slot 2
; ASM-NEXT: add r17, r16, #33; vmac cm7, cm7, x5, x7, r4 // Delay Slot 1
; ASM-NEXT: and r1, r1, r9; vmac cm5, cm5, x1, x7, r4 // Delay Slot 2
; ASM-NEXT: add r1, r1, #33; vmac cm7, cm7, x5, x7, r4 // Delay Slot 1
; ASM-NEXT: // %bb.3: // %outer.loop.latch
; ASM-NEXT: // in Loop: Header=BB0_1 Depth=1
; ASM-NEXT: nopa ; nopb ; nopx ; mov s3, r6; vst.srs.s16.s32 bmh0, s2, [p3, #32]
Expand All @@ -161,14 +161,14 @@ define dso_local void @conv2d.loop.nest(ptr %add.ptr6.i51, ptr %add.ptr5, ptr %c
; ASM-NEXT: padda.3d [p1], d2; paddb [p2], m1; mov m3, r14 // Delay Slot 2
; ASM-NEXT: padda.3d [p2], d3; mov r25, dc5 // Delay Slot 1
; ASM-NEXT: // %bb.4: // %exitStub
; ASM-NEXT: lda p7, [sp, #-32]; nopb ; nopx // 4-byte Folded Reload
; ASM-NEXT: lda p7, [sp, #-32]; nopb ; nopxm // 4-byte Folded Reload
; ASM-NEXT: lda p6, [sp, #-28] // 4-byte Folded Reload
; ASM-NEXT: ret lr
; ASM-NEXT: nop // Delay Slot 5
; ASM-NEXT: nop // Delay Slot 4
; ASM-NEXT: nop // Delay Slot 3
; ASM-NEXT: mov r17, r30 // Delay Slot 2
; ASM-NEXT: paddb [sp], #-32; mov r16, r29 // Delay Slot 1
; ASM-NEXT: nop // Delay Slot 2
; ASM-NEXT: paddb [sp], #-32 // Delay Slot 1
newFuncRoot:
br label %outer.loop.header

Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ define void @nested(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r7, r6, r4; nopm ; nopv
; CHECK-NEXT: mov dj0, r7
; CHECK-NEXT: lda r8, [p3, dj0]
; CHECK-NEXT: lda r7, [p3, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r5, r5, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r6, r6, #1 // Delay Slot 3
; CHECK-NEXT: add r2, r2, r8 // Delay Slot 2
; CHECK-NEXT: add r2, r2, r7 // Delay Slot 2
; CHECK-NEXT: st r2, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.cond3.for.cond.cleanup5_crit_edge
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/sibling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ define void @sibling(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r6, r5, r4; nopm ; nopv
; CHECK-NEXT: mov dj0, r6
; CHECK-NEXT: lda r7, [p1, dj0]
; CHECK-NEXT: lda r6, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r0, r0, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r5, r5, #1 // Delay Slot 3
; CHECK-NEXT: add r3, r3, r7 // Delay Slot 2
; CHECK-NEXT: add r3, r3, r6 // Delay Slot 2
; CHECK-NEXT: st r3, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.body6.lr.ph
Expand All @@ -43,14 +43,14 @@ define void @sibling(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r4, r2, r3; nopm ; nopv
; CHECK-NEXT: mov dj0, r4
; CHECK-NEXT: lda r5, [p1, dj0]
; CHECK-NEXT: lda r4, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r1, r1, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r2, r2, #1 // Delay Slot 3
; CHECK-NEXT: add r0, r0, r5 // Delay Slot 2
; CHECK-NEXT: add r0, r0, r4 // Delay Slot 2
; CHECK-NEXT: st r0, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.4: // %for.cond.cleanup5
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/simple.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ define void @simple(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r4, r2, r3; nopm ; nopv
; CHECK-NEXT: mov dj0, r4
; CHECK-NEXT: lda r5, [p1, dj0]
; CHECK-NEXT: lda r4, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: jnzd r0, r0, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: add r2, r2, #1 // Delay Slot 3
; CHECK-NEXT: add r1, r1, r5 // Delay Slot 2
; CHECK-NEXT: add r1, r1, r4 // Delay Slot 2
; CHECK-NEXT: st r1, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
Expand Down
Loading

0 comments on commit 4a65ca1

Please sign in to comment.