@@ -49,7 +49,9 @@ define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
4949; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
5050; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
5151; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
52+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
5253; GFX1250-TRUE16-NEXT: ds_store_b8 v0, v1
54+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
5355; GFX1250-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1
5456; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
5557; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -59,7 +61,9 @@ define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
5961; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
6062; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
6163; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
64+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
6265; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v1
66+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
6367; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v2
6468; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
6569; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
@@ -112,7 +116,9 @@ define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) {
112116; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
113117; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
114118; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
119+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
115120; GFX1250-TRUE16-NEXT: ds_store_b8 v0, v1 offset:8
121+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
116122; GFX1250-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1 offset:16
117123; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
118124; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -122,7 +128,9 @@ define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) {
122128; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
123129; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
124130; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
131+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
125132; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v1 offset:8
133+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
126134; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v2 offset:16
127135; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
128136; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
@@ -177,7 +185,9 @@ define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) {
177185; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
178186; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
179187; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
188+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
180189; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
190+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
181191; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
182192; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
183193; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -187,7 +197,9 @@ define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) {
187197; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
188198; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
189199; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
200+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
190201; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
202+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
191203; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
192204; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
193205; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
@@ -240,7 +252,9 @@ define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val)
240252; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
241253; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
242254; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
255+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
243256; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
257+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
244258; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
245259; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
246260; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -250,7 +264,9 @@ define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val)
250264; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
251265; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
252266; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
267+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
253268; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
269+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
254270; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
255271; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
256272; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
@@ -288,6 +304,7 @@ define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) {
288304; GFX1250: ; %bb.0:
289305; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
290306; GFX1250-NEXT: s_wait_kmcnt 0x0
307+ ; GFX1250-NEXT: s_wait_xcnt 0x0
291308; GFX1250-NEXT: ds_store_b32 v0, v1
292309; GFX1250-NEXT: s_wait_dscnt 0x0
293310; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -322,6 +339,7 @@ define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val)
322339; GFX1250: ; %bb.0:
323340; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
324341; GFX1250-NEXT: s_wait_kmcnt 0x0
342+ ; GFX1250-NEXT: s_wait_xcnt 0x0
325343; GFX1250-NEXT: ds_store_b32 v0, v1 offset:64
326344; GFX1250-NEXT: s_wait_dscnt 0x0
327345; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -358,6 +376,7 @@ define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) {
358376; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
359377; GFX1250-NEXT: s_wait_kmcnt 0x0
360378; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
379+ ; GFX1250-NEXT: s_wait_xcnt 0x0
361380; GFX1250-NEXT: ds_store_b64 v0, v[2:3]
362381; GFX1250-NEXT: s_wait_dscnt 0x0
363382; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -393,6 +412,7 @@ define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val)
393412; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
394413; GFX1250-NEXT: s_wait_kmcnt 0x0
395414; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
415+ ; GFX1250-NEXT: s_wait_xcnt 0x0
396416; GFX1250-NEXT: ds_store_b64 v0, v[2:3] offset:128
397417; GFX1250-NEXT: s_wait_dscnt 0x0
398418; GFX1250-NEXT: s_set_pc_i64 s[30:31]
@@ -444,7 +464,9 @@ define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
444464; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
445465; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
446466; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
467+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
447468; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
469+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
448470; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
449471; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
450472; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -454,7 +476,9 @@ define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
454476; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
455477; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
456478; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
479+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
457480; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
481+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
458482; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
459483; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
460484; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
@@ -509,7 +533,9 @@ define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.v
509533; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
510534; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
511535; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
536+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
512537; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
538+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
513539; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
514540; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
515541; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -519,7 +545,9 @@ define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.v
519545; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
520546; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
521547; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
548+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
522549; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
550+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
523551; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
524552; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
525553; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
@@ -575,7 +603,9 @@ define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
575603; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
576604; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
577605; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
606+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
578607; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
608+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
579609; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
580610; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
581611; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -585,7 +615,9 @@ define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
585615; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
586616; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
587617; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
618+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
588619; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
620+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
589621; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
590622; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
591623; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
@@ -640,7 +672,9 @@ define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.
640672; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
641673; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
642674; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
675+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
643676; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
677+ ; GFX1250-TRUE16-NEXT: s_wait_xcnt 0x0
644678; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
645679; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
646680; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
@@ -650,7 +684,9 @@ define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.
650684; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
651685; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
652686; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
687+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
653688; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
689+ ; GFX1250-FAKE16-NEXT: s_wait_xcnt 0x0
654690; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
655691; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
656692; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
0 commit comments