Skip to content

Commit ceb587a

Browse files
authored
[AMDGPU] Fix crash in allowsMisalignedMemoryAccesses with i1 (llvm#105794)
1 parent caa844e commit ceb587a

File tree

3 files changed

+35
-1
lines changed

3 files changed

+35
-1
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -1695,7 +1695,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
16951695
if (!Subtarget->hasUnalignedDSAccessEnabled() && Alignment < Align(4))
16961696
return false;
16971697

1698-
Align RequiredAlignment(PowerOf2Ceil(Size/8)); // Natural alignment.
1698+
Align RequiredAlignment(
1699+
PowerOf2Ceil(divideCeil(Size, 8))); // Natural alignment.
16991700
if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
17001701
Alignment < RequiredAlignment)
17011702
return false;

llvm/test/CodeGen/AMDGPU/load-local-i1.ll

+13
Original file line numberDiff line numberDiff line change
@@ -462,4 +462,17 @@ define amdgpu_kernel void @local_sextload_v64i1_to_v64i64(ptr addrspace(3) %out,
462462
ret void
463463
}
464464

465+
; FUNC-LABEL: {{^}}local_load_i1_misaligned:
466+
; SICIVI: s_mov_b32 m0
467+
; GFX9-NOT: m0
468+
define amdgpu_kernel void @local_load_i1_misaligned(ptr addrspace(3) %in, ptr addrspace (3) %out) #0 {
469+
%in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1
470+
%load.1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
471+
%load.2 = load <8 x i1>, ptr addrspace(3) %in, align 1
472+
%out.gep.1 = getelementptr i1, ptr addrspace(3) %out, i32 16
473+
store <16 x i1> %load.1, ptr addrspace(3) %out
474+
store <8 x i1> %load.2, ptr addrspace(3) %out.gep.1
475+
ret void
476+
}
477+
465478
attributes #0 = { nounwind }
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa --mcpu=gfx940 -passes=load-store-vectorizer -S -o - %s | FileCheck %s
3+
4+
; Don't crash when checking for misaligned accesses with sub-byte size.
5+
6+
define void @misaligned_access_i1(ptr addrspace(3) %in) #0 {
7+
; CHECK-LABEL: define void @misaligned_access_i1(
8+
; CHECK-SAME: ptr addrspace(3) [[IN:%.*]]) #[[ATTR0:[0-9]+]] {
9+
; CHECK-NEXT: [[IN_GEP_1:%.*]] = getelementptr i1, ptr addrspace(3) [[IN]], i32 1
10+
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i1>, ptr addrspace(3) [[IN_GEP_1]], align 4
11+
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i1>, ptr addrspace(3) [[IN]], align 1
12+
; CHECK-NEXT: ret void
13+
;
14+
%in.gep.1 = getelementptr i1, ptr addrspace(3) %in, i32 1
15+
16+
%1 = load <16 x i1>, ptr addrspace(3) %in.gep.1, align 4
17+
%2 = load <8 x i1>, ptr addrspace(3) %in, align 1
18+
ret void
19+
}
20+

0 commit comments

Comments
 (0)