@@ -479,21 +479,28 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
479
479
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
480
480
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
481
481
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
482
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
483
- ; GFX9-O0-NEXT: s_nop 0
484
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
485
482
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
486
483
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
487
484
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
488
485
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
489
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
486
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
487
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
488
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
489
+ ; GFX9-O0-NEXT: s_nop 0
490
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
491
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
492
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
493
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
490
494
; GFX9-O0-NEXT: s_nop 0
491
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
492
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
495
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
496
+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
497
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
493
498
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
494
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
499
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
500
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
495
501
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
496
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
502
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
503
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
497
504
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
498
505
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
499
506
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -504,6 +511,7 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
504
511
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
505
512
; GFX9-O0-NEXT: s_mov_b32 s14, s13
506
513
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
514
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
507
515
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
508
516
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
509
517
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -1036,10 +1044,10 @@ define i128 @v_sdiv_i128_vv(i128 %lhs, i128 %rhs) {
1036
1044
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
1037
1045
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
1038
1046
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
1039
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1040
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1041
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1042
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1047
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
1048
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
1049
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
1050
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
1043
1051
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
1044
1052
; GFX9-O0-NEXT: s_mov_b32 s5, s6
1045
1053
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
@@ -2654,21 +2662,28 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2654
2662
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2655
2663
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
2656
2664
; GFX9-O0-NEXT: v_mov_b32_e32 v5, v8
2657
- ; GFX9-O0-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2658
- ; GFX9-O0-NEXT: s_nop 0
2659
- ; GFX9-O0-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2660
2665
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2661
2666
; GFX9-O0-NEXT: ; implicit-def: $sgpr8
2662
2667
; GFX9-O0-NEXT: ; kill: def $vgpr7 killed $vgpr7 def $vgpr7_vgpr8 killed $exec
2663
2668
; GFX9-O0-NEXT: v_mov_b32_e32 v8, v6
2664
- ; GFX9-O0-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2669
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v8
2670
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v7
2671
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
2672
+ ; GFX9-O0-NEXT: s_nop 0
2673
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
2674
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v10, v5
2675
+ ; GFX9-O0-NEXT: v_mov_b32_e32 v9, v4
2676
+ ; GFX9-O0-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
2665
2677
; GFX9-O0-NEXT: s_nop 0
2666
- ; GFX9-O0-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2667
- ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[6:7]
2678
+ ; GFX9-O0-NEXT: buffer_store_dword v10, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
2679
+ ; GFX9-O0-NEXT: s_mov_b64 s[8:9], s[6:7]
2680
+ ; GFX9-O0-NEXT: v_cmp_eq_u64_e64 s[8:9], v[7:8], s[8:9]
2668
2681
; GFX9-O0-NEXT: s_mov_b64 s[12:13], 0x7f
2669
- ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[12:13]
2682
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[12:13]
2683
+ ; GFX9-O0-NEXT: v_cmp_gt_u64_e64 s[14:15], v[4:5], s[14:15]
2670
2684
; GFX9-O0-NEXT: v_cndmask_b32_e64 v9, 0, 1, s[14:15]
2671
- ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[6:7]
2685
+ ; GFX9-O0-NEXT: s_mov_b64 s[14:15], s[6:7]
2686
+ ; GFX9-O0-NEXT: v_cmp_ne_u64_e64 s[14:15], v[7:8], s[14:15]
2672
2687
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[14:15]
2673
2688
; GFX9-O0-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[8:9]
2674
2689
; GFX9-O0-NEXT: v_and_b32_e64 v6, 1, v6
@@ -2679,6 +2694,7 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
2679
2694
; GFX9-O0-NEXT: v_mov_b32_e32 v6, v5
2680
2695
; GFX9-O0-NEXT: s_mov_b32 s14, s13
2681
2696
; GFX9-O0-NEXT: v_xor_b32_e64 v6, v6, s14
2697
+ ; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 killed $vgpr4_vgpr5 killed $exec
2682
2698
; GFX9-O0-NEXT: ; kill: def $sgpr12 killed $sgpr12 killed $sgpr12_sgpr13
2683
2699
; GFX9-O0-NEXT: v_xor_b32_e64 v4, v4, s12
2684
2700
; GFX9-O0-NEXT: ; kill: def $vgpr4 killed $vgpr4 def $vgpr4_vgpr5 killed $exec
@@ -3211,10 +3227,10 @@ define i128 @v_udiv_i128_vv(i128 %lhs, i128 %rhs) {
3211
3227
; GFX9-O0-NEXT: buffer_load_dword v10, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
3212
3228
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
3213
3229
; GFX9-O0-NEXT: buffer_load_dword v12, off, s[0:3], s32 offset:48 ; 4-byte Folded Reload
3214
- ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3215
- ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3216
- ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3217
- ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3230
+ ; GFX9-O0-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
3231
+ ; GFX9-O0-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
3232
+ ; GFX9-O0-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
3233
+ ; GFX9-O0-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
3218
3234
; GFX9-O0-NEXT: s_mov_b64 s[6:7], 1
3219
3235
; GFX9-O0-NEXT: s_mov_b32 s5, s6
3220
3236
; GFX9-O0-NEXT: s_waitcnt vmcnt(1)
0 commit comments