Skip to content

Commit 73f1cd4

Browse files
committed
[AIE2] NFC: Add baseline test with critical CM reg pressure
In a follow-up commit, the premisched will re-order the instructions to reduce the pressure and avoid spills during RA.
1 parent 8dc4a1b commit 73f1cd4

File tree

1 file changed

+125
-0
lines changed

1 file changed

+125
-0
lines changed
Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
8+
# RUN: llc -march=aie2 -run-pass=machine-scheduler %s -o - | FileCheck %s
9+
10+
11+
# This represents the innermost loop of Add2D after SW pipelining.
12+
# We should see most of the VLDA.UPS instructions move down in the loop
13+
# BB to reduce the reg pressure and avoid spills. They can later be moved back
14+
# up by the post-RA scheduler. This should also make the 4 acc1024 COPY
15+
# instructions coalesce-able.
16+
---
17+
name: add2d_innermost
18+
tracksRegLiveness: true
19+
body: |
20+
; CHECK-LABEL: name: add2d_innermost
21+
; CHECK: bb.0.entry:
22+
; CHECK-NEXT: successors: %bb.1(0x80000000)
23+
; CHECK-NEXT: liveins: $p0, $m0, $cm0, $cm1, $s0, $d1, $x0, $r0, $d0_3d
24+
; CHECK-NEXT: {{ $}}
25+
; CHECK-NEXT: [[COPY:%[0-9]+]]:em = COPY $m0
26+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $cm0
27+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc1024 = COPY $cm0
28+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc1024 = COPY $cm0
29+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc1024 = COPY $cm0
30+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc1024 = COPY $cm0
31+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:acc1024 = COPY $cm0
32+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:acc1024 = COPY $cm0
33+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:acc1024 = COPY $cm0
34+
; CHECK-NEXT: [[COPY9:%[0-9]+]]:ep_as_32bit = COPY $p0
35+
; CHECK-NEXT: [[COPY10:%[0-9]+]]:ep_as_32bit = COPY $p0
36+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:ep_as_32bit = COPY $p0
37+
; CHECK-NEXT: [[COPY12:%[0-9]+]]:eds = COPY $d0_3d
38+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:er = COPY $r0
39+
; CHECK-NEXT: [[COPY14:%[0-9]+]]:er = COPY $r0
40+
; CHECK-NEXT: [[COPY15:%[0-9]+]]:mss = COPY $s0
41+
; CHECK-NEXT: [[COPY16:%[0-9]+]]:mss = COPY $s0
42+
; CHECK-NEXT: PseudoJ_jump_imm %bb.1
43+
; CHECK-NEXT: {{ $}}
44+
; CHECK-NEXT: bb.1:
45+
; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000)
46+
; CHECK-NEXT: {{ $}}
47+
; CHECK-NEXT: [[COPY17:%[0-9]+]]:acc1024 = COPY [[COPY4]]
48+
; CHECK-NEXT: [[VADD:%[0-9]+]]:acc1024 = VADD [[COPY5]], [[COPY17]], [[COPY14]]
49+
; CHECK-NEXT: [[COPY18:%[0-9]+]]:acc1024 = COPY [[COPY3]]
50+
; CHECK-NEXT: [[VADD1:%[0-9]+]]:acc1024 = VADD [[COPY6]], [[COPY18]], [[COPY14]]
51+
; CHECK-NEXT: [[COPY19:%[0-9]+]]:acc1024 = COPY [[COPY2]]
52+
; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc1024, [[COPY9:%[0-9]+]]:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm [[COPY15]], [[COPY9]], [[COPY]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
53+
; CHECK-NEXT: [[VADD2:%[0-9]+]]:acc1024 = VADD [[COPY7]], [[COPY19]], [[COPY14]]
54+
; CHECK-NEXT: [[COPY20:%[0-9]+]]:acc1024 = COPY [[COPY1]]
55+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc1024, [[COPY9:%[0-9]+]]:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm [[COPY15]], [[COPY9]], [[COPY]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
56+
; CHECK-NEXT: [[VADD3:%[0-9]+]]:acc1024 = VADD [[COPY8]], [[COPY20]], [[COPY14]]
57+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc1024, [[COPY9:%[0-9]+]]:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm [[COPY15]], [[COPY9]], [[COPY]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
58+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024, [[COPY9:%[0-9]+]]:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm [[COPY15]], [[COPY9]], [[COPY]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
59+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm [[COPY11]], 32, [[VADD]], [[COPY16]], implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
60+
; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc1024, [[COPY10:%[0-9]+]]:ep_as_32bit, [[COPY12:%[0-9]+]].sub_dim_count:eds, [[COPY12:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 [[COPY15]], [[COPY10]], [[COPY12]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
61+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm [[COPY11]], 32, [[VADD1]], [[COPY16]], implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
62+
; CHECK-NEXT: [[COPY6:%[0-9]+]]:acc1024, [[COPY10:%[0-9]+]]:ep_as_32bit, [[COPY12:%[0-9]+]].sub_dim_count:eds, [[COPY12:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 [[COPY15]], [[COPY10]], [[COPY12]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
63+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm [[COPY11]], 32, [[VADD2]], [[COPY16]], implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
64+
; CHECK-NEXT: [[COPY7:%[0-9]+]]:acc1024, [[COPY10:%[0-9]+]]:ep_as_32bit, [[COPY12:%[0-9]+]].sub_dim_count:eds, [[COPY12:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 [[COPY15]], [[COPY10]], [[COPY12]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
65+
; CHECK-NEXT: [[COPY11:%[0-9]+]]:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm [[COPY11]], 32, [[VADD3]], [[COPY16]], implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
66+
; CHECK-NEXT: [[COPY8:%[0-9]+]]:acc1024, [[COPY10:%[0-9]+]]:ep_as_32bit, [[COPY12:%[0-9]+]].sub_dim_count:eds, [[COPY12:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 [[COPY15]], [[COPY10]], [[COPY12]], implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
67+
; CHECK-NEXT: [[COPY13:%[0-9]+]]:er = ADD_add_r_ri [[COPY13]], -4, implicit-def dead $srcarry
68+
; CHECK-NEXT: PseudoJNZ [[COPY13]], %bb.1
69+
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: bb.2:
72+
; CHECK-NEXT: PseudoRET implicit $lr
73+
74+
bb.0.entry:
75+
liveins: $p0, $m0, $cm0, $cm1, $s0, $d1, $x0, $r0, $d0_3d
76+
77+
%367:acc1024 = COPY $cm0
78+
%365:acc1024 = COPY $cm0
79+
%363:acc1024 = COPY $cm0
80+
%361:acc1024 = COPY $cm0
81+
%362:acc1024 = COPY $cm0
82+
%364:acc1024 = COPY $cm0
83+
%366:acc1024 = COPY $cm0
84+
%368:acc1024 = COPY $cm0
85+
%248:mss = COPY $s0
86+
%245:mss = COPY $s0
87+
%355:ep_as_32bit = COPY $p0
88+
%358:ep_as_32bit = COPY $p0
89+
%359:ep_as_32bit = COPY $p0
90+
%82:em = COPY $m0
91+
%272:eds = COPY $d0_3d
92+
%360:er = COPY $r0
93+
%206:er = COPY $r0
94+
PseudoJ_jump_imm %bb.1
95+
96+
bb.1:
97+
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
98+
99+
%327:acc1024 = COPY %367
100+
%325:acc1024 = COPY %365
101+
%323:acc1024 = COPY %363
102+
%321:acc1024 = COPY %361
103+
%361:acc1024, %355:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm %248, %355, %82, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
104+
%360:er = ADD_add_r_ri %360, -4, implicit-def dead $srcarry
105+
%281:acc1024 = VADD %362, %321, %206
106+
%363:acc1024, %355:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm %248, %355, %82, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
107+
%284:acc1024 = VADD %364, %323, %206
108+
%365:acc1024, %355:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm %248, %355, %82, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
109+
%287:acc1024 = VADD %366, %325, %206
110+
%367:acc1024, %355:ep_as_32bit = VLDA_UPS_S32_D8_ag_pstm_nrm %248, %355, %82, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 64)
111+
%362:acc1024, %358:ep_as_32bit, %272.sub_dim_count:eds, %272.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 %248, %358, %272, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
112+
%295:acc1024 = VADD %368, %327, %206
113+
%364:acc1024, %358:ep_as_32bit, %272.sub_dim_count:eds, %272.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 %248, %358, %272, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
114+
%366:acc1024, %358:ep_as_32bit, %272.sub_dim_count:eds, %272.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 %248, %358, %272, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
115+
%359:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm %359, 32, %281, %245, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
116+
%359:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm %359, 32, %284, %245, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
117+
%368:acc1024, %358:ep_as_32bit, %272.sub_dim_count:eds, %272.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8 %248, %358, %272, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<32 x s8>) from stack - 32)
118+
%359:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm %359, 32, %287, %245, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
119+
%359:ep_as_32bit = VST_SRS_D8_S32_ag_pstm_nrm_imm %359, 32, %295, %245, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<32 x s8>) into stack - 128)
120+
PseudoJNZ %360, %bb.1
121+
PseudoJ_jump_imm %bb.2
122+
123+
bb.2:
124+
PseudoRET implicit $lr
125+
...

0 commit comments

Comments
 (0)