Skip to content

Commit 5e25ba8

Browse files
committed
[GCAtomic] Treat single capability structs like a capability
This allows emitting inline atomics instead of needing a libcall and avoids casting such structs to i128. See https://git.morello-project.org/morello/llvm-project/-/issues/75
1 parent 44fa170 commit 5e25ba8

File tree

2 files changed

+90
-35
lines changed

2 files changed

+90
-35
lines changed

clang/lib/CodeGen/CGAtomic.cpp

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,8 @@ bool isAtomicStoreOp(AtomicExpr::AtomicOp Op) {
148148
}
149149
UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
150150
AtomicSizeInBits, C.toBits(lvalue.getAlignment()),
151-
AtomicTy->isCHERICapabilityType(CGF.CGM.getContext()));
151+
AtomicTy->isCHERICapabilityType(CGF.getContext()) ||
152+
AtomicTy->isSingleCapabilityRecord(CGF.getContext()));
152153
}
153154

154155
QualType getAtomicType() const { return AtomicTy; }
@@ -549,7 +550,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
549550
bool PostOpMinMax = false;
550551
unsigned PostOp = 0;
551552
QualType AtomicTy = E->getPtr()->getType()->getPointeeType();
552-
bool IsCheriCap = AtomicTy->isCHERICapabilityType(CGF.CGM.getContext());
553+
bool IsCheriCap = AtomicTy->isCHERICapabilityType(CGF.getContext()) ||
554+
AtomicTy->isSingleCapabilityRecord(CGF.getContext());
553555

554556
switch (E->getOp()) {
555557
case AtomicExpr::AO__c11_atomic_init:
@@ -820,12 +822,14 @@ static void
820822
AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
821823
bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy,
822824
SourceLocation Loc, CharUnits SizeInChars) {
825+
bool IsCapTy = ValTy->isCHERICapabilityType(CGF.getContext()) ||
826+
ValTy->isSingleCapabilityRecord(CGF.getContext());
823827
if (UseOptimizedLibcall) {
824828
// Load value and pass it to the function directly.
825829
CharUnits Align = CGF.getContext().getTypeAlignInChars(ValTy);
826830
int64_t SizeInBits = CGF.getContext().toBits(SizeInChars);
827831
llvm::Type *ITy;
828-
if (ValTy->isCHERICapabilityType(CGF.getContext())) {
832+
if (IsCapTy) {
829833
ValTy = CGF.getContext().getPointerType(CGF.getContext().VoidTy,
830834
PIK_Capability);
831835
ITy = CGF.Int8CheriCapTy;
@@ -845,7 +849,7 @@ AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args,
845849
} else {
846850
// Non-optimized functions always take a reference.
847851
// NB: Capabilities must be passed directly to the optimized libcall
848-
assert(!ValTy->isCHERICapabilityType(CGF.getContext()) &&
852+
assert(!IsCapTy &&
849853
"Capabilities should not be passed to the generic libcall");
850854
Args.add(RValue::get(Val), CGF.getContext().VoidPtrTy);
851855
}
@@ -874,7 +878,8 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
874878
uint64_t Size = TInfo.Width.getQuantity();
875879
unsigned MaxInlineWidthInBits = getTarget().getMaxAtomicInlineWidth();
876880

877-
bool IsCheriCap = AtomicTy->isCHERICapabilityType(CGM.getContext());
881+
bool IsCheriCap = AtomicTy->isCHERICapabilityType(CGM.getContext()) ||
882+
AtomicTy->isSingleCapabilityRecord(CGM.getContext());
878883
bool Oversized = (!IsCheriCap &&
879884
getContext().toBits(TInfo.Width) > MaxInlineWidthInBits) ||
880885
(IsCheriCap && MaxInlineWidthInBits == 0);
@@ -1523,14 +1528,16 @@ RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) {
15231528

15241529
Address AtomicInfo::castToAtomicIntPointer(Address addr) const {
15251530
llvm::Type *ty;
1526-
if (AtomicTy->isCHERICapabilityType(CGF.getContext())) {
1531+
if (AtomicTy->isCHERICapabilityType(CGF.getContext()) ||
1532+
AtomicTy->isSingleCapabilityRecord(CGF.getContext())) {
15271533
// If capability atomics are natively supported the instruction expects
15281534
// a capability type. We also pass capabilities directly to the atomic
15291535
// libcalls (i.e. always use optimized ones) since this is required to
15301536
// support the RMW operations and special-casing the load/store/xchg to
15311537
// use the generic libcalls (with mutex+memcpy) adds unncessary complexity.
1532-
if (!UseLibcall) {
1533-
// If we aren't using a libcall there is no need to cast to i8*
1538+
if (!UseLibcall && !AtomicTy->isSingleCapabilityRecord(CGF.getContext())) {
1539+
// If we aren't using a libcall and aren't using a single-capability
1540+
// struct a there is no need to cast to i8*
15341541
return addr.withElementType(getAtomicAddress().getElementType());
15351542
}
15361543
ty = CGF.CGM.Int8CheriCapTy;

clang/test/CodeGen/cheri/c11-atomic-caps-struct.c

Lines changed: 75 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
// RUN: | opt -S --passes=mem2reg | FileCheck --check-prefix=HYBRID %s
55
// RUN: %riscv64_cheri_purecap_cc1 -target-feature +a -std=c11 -o - -emit-llvm -disable-O0-optnone %s -verify \
66
// RUN: | opt -S --passes=mem2reg | FileCheck --check-prefix=PURECAP %s
7+
// expected-no-diagnostics
78

89
typedef struct capstruct {
910
unsigned __intcap value;
@@ -36,24 +37,25 @@ void test_init(_Atomic(capstruct) *f, capstruct value) {
3637
// HYBRID-NEXT: entry:
3738
// HYBRID-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_CAPSTRUCT:%.*]], align 16
3839
// HYBRID-NEXT: [[ATOMIC_TEMP:%.*]] = alloca [[STRUCT_CAPSTRUCT]], align 16
39-
// HYBRID-NEXT: call void @__atomic_load(i64 noundef 16, ptr noundef [[F]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 5)
40+
// HYBRID-NEXT: [[TMP0:%.*]] = load atomic ptr addrspace(200), ptr [[F]] seq_cst, align 16
41+
// HYBRID-NEXT: store ptr addrspace(200) [[TMP0]], ptr [[ATOMIC_TEMP]], align 16
4042
// HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[ATOMIC_TEMP]], i64 16, i1 false)
41-
// HYBRID-NEXT: [[TMP0:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr [[RETVAL]], align 16
42-
// HYBRID-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP0]]
43+
// HYBRID-NEXT: [[TMP1:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr [[RETVAL]], align 16
44+
// HYBRID-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP1]]
4345
//
4446
// PURECAP-LABEL: define {{[^@]+}}@test_load
4547
// PURECAP-SAME: (ptr addrspace(200) noundef [[F:%.*]]) addrspace(200) #[[ATTR0]] {
4648
// PURECAP-NEXT: entry:
4749
// PURECAP-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_CAPSTRUCT:%.*]], align 16, addrspace(200)
4850
// PURECAP-NEXT: [[ATOMIC_TEMP:%.*]] = alloca [[STRUCT_CAPSTRUCT]], align 16, addrspace(200)
49-
// PURECAP-NEXT: call void @__atomic_load(i64 noundef 16, ptr addrspace(200) noundef [[F]], ptr addrspace(200) noundef [[ATOMIC_TEMP]], i32 noundef signext 5)
51+
// PURECAP-NEXT: [[TMP0:%.*]] = load atomic ptr addrspace(200), ptr addrspace(200) [[F]] seq_cst, align 16
52+
// PURECAP-NEXT: store ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[ATOMIC_TEMP]], align 16
5053
// PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[RETVAL]], ptr addrspace(200) align 16 [[ATOMIC_TEMP]], i64 16, i1 false)
51-
// PURECAP-NEXT: [[TMP0:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[RETVAL]], align 16
52-
// PURECAP-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP0]]
54+
// PURECAP-NEXT: [[TMP1:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[RETVAL]], align 16
55+
// PURECAP-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP1]]
5356
//
5457
capstruct test_load(_Atomic(capstruct) *f) {
5558
return __c11_atomic_load(f, __ATOMIC_SEQ_CST);
56-
// expected-warning@-1{{large atomic operation may incur significant performance penalty}}
5759
}
5860

5961
// HYBRID-LABEL: define {{[^@]+}}@test_store
@@ -64,7 +66,8 @@ capstruct test_load(_Atomic(capstruct) *f) {
6466
// HYBRID-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr [[VALUE]], i32 0, i32 0
6567
// HYBRID-NEXT: store ptr addrspace(200) [[VALUE_COERCE]], ptr [[COERCE_DIVE]], align 16
6668
// HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DOTATOMICTMP]], ptr align 16 [[VALUE]], i64 16, i1 false)
67-
// HYBRID-NEXT: call void @__atomic_store(i64 noundef 16, ptr noundef [[F]], ptr noundef [[DOTATOMICTMP]], i32 noundef signext 5)
69+
// HYBRID-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr [[DOTATOMICTMP]], align 16
70+
// HYBRID-NEXT: store atomic ptr addrspace(200) [[TMP0]], ptr [[F]] seq_cst, align 16
6871
// HYBRID-NEXT: ret void
6972
//
7073
// PURECAP-LABEL: define {{[^@]+}}@test_store
@@ -75,12 +78,12 @@ capstruct test_load(_Atomic(capstruct) *f) {
7578
// PURECAP-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[VALUE]], i32 0, i32 0
7679
// PURECAP-NEXT: store ptr addrspace(200) [[VALUE_COERCE]], ptr addrspace(200) [[COERCE_DIVE]], align 16
7780
// PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[DOTATOMICTMP]], ptr addrspace(200) align 16 [[VALUE]], i64 16, i1 false)
78-
// PURECAP-NEXT: call void @__atomic_store(i64 noundef 16, ptr addrspace(200) noundef [[F]], ptr addrspace(200) noundef [[DOTATOMICTMP]], i32 noundef signext 5)
81+
// PURECAP-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[DOTATOMICTMP]], align 16
82+
// PURECAP-NEXT: store atomic ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[F]] seq_cst, align 16
7983
// PURECAP-NEXT: ret void
8084
//
8185
void test_store(_Atomic(capstruct) *f, capstruct value) {
8286
__c11_atomic_store(f, value, __ATOMIC_SEQ_CST);
83-
// expected-warning@-1{{large atomic operation may incur significant performance penalty}}
8487
}
8588

8689
// HYBRID-LABEL: define {{[^@]+}}@test_xchg
@@ -93,10 +96,12 @@ void test_store(_Atomic(capstruct) *f, capstruct value) {
9396
// HYBRID-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr [[VALUE]], i32 0, i32 0
9497
// HYBRID-NEXT: store ptr addrspace(200) [[VALUE_COERCE]], ptr [[COERCE_DIVE]], align 16
9598
// HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DOTATOMICTMP]], ptr align 16 [[VALUE]], i64 16, i1 false)
96-
// HYBRID-NEXT: call void @__atomic_exchange(i64 noundef 16, ptr noundef [[F]], ptr noundef [[DOTATOMICTMP]], ptr noundef [[ATOMIC_TEMP]], i32 noundef signext 5)
99+
// HYBRID-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr [[DOTATOMICTMP]], align 16
100+
// HYBRID-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr [[F]], ptr addrspace(200) [[TMP0]] seq_cst, align 16
101+
// HYBRID-NEXT: store ptr addrspace(200) [[TMP1]], ptr [[ATOMIC_TEMP]], align 16
97102
// HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[RETVAL]], ptr align 16 [[ATOMIC_TEMP]], i64 16, i1 false)
98-
// HYBRID-NEXT: [[TMP0:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr [[RETVAL]], align 16
99-
// HYBRID-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP0]]
103+
// HYBRID-NEXT: [[TMP2:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr [[RETVAL]], align 16
104+
// HYBRID-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP2]]
100105
//
101106
// PURECAP-LABEL: define {{[^@]+}}@test_xchg
102107
// PURECAP-SAME: (ptr addrspace(200) noundef [[F:%.*]], ptr addrspace(200) [[VALUE_COERCE:%.*]]) addrspace(200) #[[ATTR0]] {
@@ -108,14 +113,15 @@ void test_store(_Atomic(capstruct) *f, capstruct value) {
108113
// PURECAP-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[VALUE]], i32 0, i32 0
109114
// PURECAP-NEXT: store ptr addrspace(200) [[VALUE_COERCE]], ptr addrspace(200) [[COERCE_DIVE]], align 16
110115
// PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[DOTATOMICTMP]], ptr addrspace(200) align 16 [[VALUE]], i64 16, i1 false)
111-
// PURECAP-NEXT: call void @__atomic_exchange(i64 noundef 16, ptr addrspace(200) noundef [[F]], ptr addrspace(200) noundef [[DOTATOMICTMP]], ptr addrspace(200) noundef [[ATOMIC_TEMP]], i32 noundef signext 5)
116+
// PURECAP-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[DOTATOMICTMP]], align 16
117+
// PURECAP-NEXT: [[TMP1:%.*]] = atomicrmw xchg ptr addrspace(200) [[F]], ptr addrspace(200) [[TMP0]] seq_cst, align 16
118+
// PURECAP-NEXT: store ptr addrspace(200) [[TMP1]], ptr addrspace(200) [[ATOMIC_TEMP]], align 16
112119
// PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[RETVAL]], ptr addrspace(200) align 16 [[ATOMIC_TEMP]], i64 16, i1 false)
113-
// PURECAP-NEXT: [[TMP0:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[RETVAL]], align 16
114-
// PURECAP-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP0]]
120+
// PURECAP-NEXT: [[TMP2:%.*]] = load [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[RETVAL]], align 16
121+
// PURECAP-NEXT: ret [[STRUCT_CAPSTRUCT]] [[TMP2]]
115122
//
116123
capstruct test_xchg(_Atomic(capstruct) *f, capstruct value) {
117124
return __c11_atomic_exchange(f, value, __ATOMIC_SEQ_CST);
118-
// expected-warning@-1{{large atomic operation may incur significant performance penalty}}
119125
}
120126

121127
// HYBRID-LABEL: define {{[^@]+}}@test_cmpxchg_weak
@@ -126,8 +132,19 @@ capstruct test_xchg(_Atomic(capstruct) *f, capstruct value) {
126132
// HYBRID-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr [[NEW]], i32 0, i32 0
127133
// HYBRID-NEXT: store ptr addrspace(200) [[NEW_COERCE]], ptr [[COERCE_DIVE]], align 16
128134
// HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DOTATOMICTMP]], ptr align 16 [[NEW]], i64 16, i1 false)
129-
// HYBRID-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[F]], ptr noundef [[EXP]], ptr noundef [[DOTATOMICTMP]], i32 noundef signext 0, i32 noundef signext 0)
130-
// HYBRID-NEXT: ret i1 [[CALL]]
135+
// HYBRID-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr [[EXP]], align 16
136+
// HYBRID-NEXT: [[TMP1:%.*]] = load ptr addrspace(200), ptr [[DOTATOMICTMP]], align 16
137+
// HYBRID-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr [[F]], ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[TMP1]] monotonic monotonic, align 16
138+
// HYBRID-NEXT: [[TMP3:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 0
139+
// HYBRID-NEXT: [[TMP4:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 1
140+
// HYBRID-NEXT: br i1 [[TMP4]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
141+
// HYBRID: cmpxchg.store_expected:
142+
// HYBRID-NEXT: store ptr addrspace(200) [[TMP3]], ptr [[EXP]], align 16
143+
// HYBRID-NEXT: br label [[CMPXCHG_CONTINUE]]
144+
// HYBRID: cmpxchg.continue:
145+
// HYBRID-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP4]] to i8
146+
// HYBRID-NEXT: [[TOBOOL:%.*]] = trunc i8 [[FROMBOOL]] to i1
147+
// HYBRID-NEXT: ret i1 [[TOBOOL]]
131148
//
132149
// PURECAP-LABEL: define {{[^@]+}}@test_cmpxchg_weak
133150
// PURECAP-SAME: (ptr addrspace(200) noundef [[F:%.*]], ptr addrspace(200) noundef [[EXP:%.*]], ptr addrspace(200) [[NEW_COERCE:%.*]]) addrspace(200) #[[ATTR0]] {
@@ -137,12 +154,22 @@ capstruct test_xchg(_Atomic(capstruct) *f, capstruct value) {
137154
// PURECAP-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[NEW]], i32 0, i32 0
138155
// PURECAP-NEXT: store ptr addrspace(200) [[NEW_COERCE]], ptr addrspace(200) [[COERCE_DIVE]], align 16
139156
// PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[DOTATOMICTMP]], ptr addrspace(200) align 16 [[NEW]], i64 16, i1 false)
140-
// PURECAP-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr addrspace(200) noundef [[F]], ptr addrspace(200) noundef [[EXP]], ptr addrspace(200) noundef [[DOTATOMICTMP]], i32 noundef signext 0, i32 noundef signext 0)
141-
// PURECAP-NEXT: ret i1 [[CALL]]
157+
// PURECAP-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[EXP]], align 16
158+
// PURECAP-NEXT: [[TMP1:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[DOTATOMICTMP]], align 16
159+
// PURECAP-NEXT: [[TMP2:%.*]] = cmpxchg weak ptr addrspace(200) [[F]], ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[TMP1]] monotonic monotonic, align 16
160+
// PURECAP-NEXT: [[TMP3:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 0
161+
// PURECAP-NEXT: [[TMP4:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 1
162+
// PURECAP-NEXT: br i1 [[TMP4]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
163+
// PURECAP: cmpxchg.store_expected:
164+
// PURECAP-NEXT: store ptr addrspace(200) [[TMP3]], ptr addrspace(200) [[EXP]], align 16
165+
// PURECAP-NEXT: br label [[CMPXCHG_CONTINUE]]
166+
// PURECAP: cmpxchg.continue:
167+
// PURECAP-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP4]] to i8
168+
// PURECAP-NEXT: [[TOBOOL:%.*]] = trunc i8 [[FROMBOOL]] to i1
169+
// PURECAP-NEXT: ret i1 [[TOBOOL]]
142170
//
143171
_Bool test_cmpxchg_weak(_Atomic(capstruct) *f, capstruct *exp, capstruct new) {
144172
return __c11_atomic_compare_exchange_weak(f, exp, new, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
145-
// expected-warning@-1{{large atomic operation may incur significant performance penalty}}
146173
}
147174

148175
// HYBRID-LABEL: define {{[^@]+}}@test_cmpxchg_strong
@@ -153,8 +180,19 @@ _Bool test_cmpxchg_weak(_Atomic(capstruct) *f, capstruct *exp, capstruct new) {
153180
// HYBRID-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr [[NEW]], i32 0, i32 0
154181
// HYBRID-NEXT: store ptr addrspace(200) [[NEW_COERCE]], ptr [[COERCE_DIVE]], align 16
155182
// HYBRID-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[DOTATOMICTMP]], ptr align 16 [[NEW]], i64 16, i1 false)
156-
// HYBRID-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr noundef [[F]], ptr noundef [[EXP]], ptr noundef [[DOTATOMICTMP]], i32 noundef signext 0, i32 noundef signext 0)
157-
// HYBRID-NEXT: ret i1 [[CALL]]
183+
// HYBRID-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr [[EXP]], align 16
184+
// HYBRID-NEXT: [[TMP1:%.*]] = load ptr addrspace(200), ptr [[DOTATOMICTMP]], align 16
185+
// HYBRID-NEXT: [[TMP2:%.*]] = cmpxchg ptr [[F]], ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[TMP1]] monotonic monotonic, align 16
186+
// HYBRID-NEXT: [[TMP3:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 0
187+
// HYBRID-NEXT: [[TMP4:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 1
188+
// HYBRID-NEXT: br i1 [[TMP4]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
189+
// HYBRID: cmpxchg.store_expected:
190+
// HYBRID-NEXT: store ptr addrspace(200) [[TMP3]], ptr [[EXP]], align 16
191+
// HYBRID-NEXT: br label [[CMPXCHG_CONTINUE]]
192+
// HYBRID: cmpxchg.continue:
193+
// HYBRID-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP4]] to i8
194+
// HYBRID-NEXT: [[TOBOOL:%.*]] = trunc i8 [[FROMBOOL]] to i1
195+
// HYBRID-NEXT: ret i1 [[TOBOOL]]
158196
//
159197
// PURECAP-LABEL: define {{[^@]+}}@test_cmpxchg_strong
160198
// PURECAP-SAME: (ptr addrspace(200) noundef [[F:%.*]], ptr addrspace(200) noundef [[EXP:%.*]], ptr addrspace(200) [[NEW_COERCE:%.*]]) addrspace(200) #[[ATTR0]] {
@@ -164,10 +202,20 @@ _Bool test_cmpxchg_weak(_Atomic(capstruct) *f, capstruct *exp, capstruct new) {
164202
// PURECAP-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_CAPSTRUCT]], ptr addrspace(200) [[NEW]], i32 0, i32 0
165203
// PURECAP-NEXT: store ptr addrspace(200) [[NEW_COERCE]], ptr addrspace(200) [[COERCE_DIVE]], align 16
166204
// PURECAP-NEXT: call void @llvm.memcpy.p200.p200.i64(ptr addrspace(200) align 16 [[DOTATOMICTMP]], ptr addrspace(200) align 16 [[NEW]], i64 16, i1 false)
167-
// PURECAP-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, ptr addrspace(200) noundef [[F]], ptr addrspace(200) noundef [[EXP]], ptr addrspace(200) noundef [[DOTATOMICTMP]], i32 noundef signext 0, i32 noundef signext 0)
168-
// PURECAP-NEXT: ret i1 [[CALL]]
205+
// PURECAP-NEXT: [[TMP0:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[EXP]], align 16
206+
// PURECAP-NEXT: [[TMP1:%.*]] = load ptr addrspace(200), ptr addrspace(200) [[DOTATOMICTMP]], align 16
207+
// PURECAP-NEXT: [[TMP2:%.*]] = cmpxchg ptr addrspace(200) [[F]], ptr addrspace(200) [[TMP0]], ptr addrspace(200) [[TMP1]] monotonic monotonic, align 16
208+
// PURECAP-NEXT: [[TMP3:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 0
209+
// PURECAP-NEXT: [[TMP4:%.*]] = extractvalue { ptr addrspace(200), i1 } [[TMP2]], 1
210+
// PURECAP-NEXT: br i1 [[TMP4]], label [[CMPXCHG_CONTINUE:%.*]], label [[CMPXCHG_STORE_EXPECTED:%.*]]
211+
// PURECAP: cmpxchg.store_expected:
212+
// PURECAP-NEXT: store ptr addrspace(200) [[TMP3]], ptr addrspace(200) [[EXP]], align 16
213+
// PURECAP-NEXT: br label [[CMPXCHG_CONTINUE]]
214+
// PURECAP: cmpxchg.continue:
215+
// PURECAP-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TMP4]] to i8
216+
// PURECAP-NEXT: [[TOBOOL:%.*]] = trunc i8 [[FROMBOOL]] to i1
217+
// PURECAP-NEXT: ret i1 [[TOBOOL]]
169218
//
170219
_Bool test_cmpxchg_strong(_Atomic(capstruct) *f, capstruct *exp, capstruct new) {
171220
return __c11_atomic_compare_exchange_strong(f, exp, new, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
172-
// expected-warning@-1{{large atomic operation may incur significant performance penalty}}
173221
}

0 commit comments

Comments
 (0)