summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2019-03-25 20:54:00 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2019-03-25 20:54:00 +0000
commitec28a1dcefc335c1c90959086194a8a168fff35a (patch)
tree5a0237d48504d1399467865e356e4f250e254862
parent51809cbc98ce187608bd5f2c46d18c5ffb7b6031 (diff)
downloadbcm5719-llvm-ec28a1dcefc335c1c90959086194a8a168fff35a.tar.gz
bcm5719-llvm-ec28a1dcefc335c1c90959086194a8a168fff35a.zip
AMDGPU: Add support for cross address space synchronization scopes (clang)
Differential Revision: https://reviews.llvm.org/D59494 llvm-svn: 356947
-rw-r--r--clang/lib/CodeGen/CGAtomic.cpp7
-rw-r--r--clang/lib/CodeGen/TargetInfo.cpp33
-rw-r--r--clang/lib/CodeGen/TargetInfo.h6
-rw-r--r--clang/test/CodeGenOpenCL/atomic-ops.cl20
4 files changed, 43 insertions, 23 deletions
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 1d18e57b0e9..11618e18d36 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -679,7 +679,8 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
// Handle constant scope.
if (auto SC = dyn_cast<llvm::ConstantInt>(Scope)) {
auto SCID = CGF.getTargetHooks().getLLVMSyncScopeID(
- ScopeModel->map(SC->getZExtValue()), CGF.CGM.getLLVMContext());
+ CGF.CGM.getLangOpts(), ScopeModel->map(SC->getZExtValue()),
+ Order, CGF.CGM.getLLVMContext());
EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
Order, SCID);
return;
@@ -708,7 +709,9 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *Expr, Address Dest,
Builder.SetInsertPoint(B);
EmitAtomicOp(CGF, Expr, Dest, Ptr, Val1, Val2, IsWeak, FailureOrder, Size,
Order,
- CGF.getTargetHooks().getLLVMSyncScopeID(ScopeModel->map(S),
+ CGF.getTargetHooks().getLLVMSyncScopeID(CGF.CGM.getLangOpts(),
+ ScopeModel->map(S),
+ Order,
CGF.getLLVMContext()));
Builder.CreateBr(ContBB);
}
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index d9bbf594ed6..5c2b3ff353e 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -462,8 +462,11 @@ TargetCodeGenInfo::performAddrSpaceCast(CodeGenModule &CGM, llvm::Constant *Src,
}
llvm::SyncScope::ID
-TargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S, llvm::LLVMContext &C) const {
- return C.getOrInsertSyncScopeID(""); /* default sync scope */
+TargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
+ SyncScope Scope,
+ llvm::AtomicOrdering Ordering,
+ llvm::LLVMContext &Ctx) const {
+ return Ctx.getOrInsertSyncScopeID(""); /* default sync scope */
}
static bool isEmptyRecord(ASTContext &Context, QualType T, bool AllowArrays);
@@ -7824,8 +7827,10 @@ public:
}
LangAS getGlobalVarAddressSpace(CodeGenModule &CGM,
const VarDecl *D) const override;
- llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
- llvm::LLVMContext &C) const override;
+ llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
+ SyncScope Scope,
+ llvm::AtomicOrdering Ordering,
+ llvm::LLVMContext &Ctx) const override;
llvm::Function *
createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
@@ -7971,10 +7976,12 @@ AMDGPUTargetCodeGenInfo::getGlobalVarAddressSpace(CodeGenModule &CGM,
}
llvm::SyncScope::ID
-AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S,
- llvm::LLVMContext &C) const {
- StringRef Name;
- switch (S) {
+AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
+ SyncScope Scope,
+ llvm::AtomicOrdering Ordering,
+ llvm::LLVMContext &Ctx) const {
+ std::string Name;
+ switch (Scope) {
case SyncScope::OpenCLWorkGroup:
Name = "workgroup";
break;
@@ -7987,7 +7994,15 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(SyncScope S,
case SyncScope::OpenCLSubGroup:
Name = "wavefront";
}
- return C.getOrInsertSyncScopeID(Name);
+
+ if (Ordering != llvm::AtomicOrdering::SequentiallyConsistent) {
+ if (!Name.empty())
+ Name = Twine(Twine(Name) + Twine("-")).str();
+
+ Name = Twine(Twine(Name) + Twine("one-as")).str();
+ }
+
+ return Ctx.getOrInsertSyncScopeID(Name);
}
bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 8a4154030ce..d7e9eee9c5b 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -268,8 +268,10 @@ public:
llvm::Type *DestTy) const;
/// Get the syncscope used in LLVM IR.
- virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
- llvm::LLVMContext &C) const;
+ virtual llvm::SyncScope::ID getLLVMSyncScopeID(const LangOptions &LangOpts,
+ SyncScope Scope,
+ llvm::AtomicOrdering Ordering,
+ llvm::LLVMContext &Ctx) const;
/// Interface class for filling custom fields of a block literal for OpenCL.
class TargetOpenCLBlockHelper {
diff --git a/clang/test/CodeGenOpenCL/atomic-ops.cl b/clang/test/CodeGenOpenCL/atomic-ops.cl
index 4899b734e9c..88f2e0d0ea4 100644
--- a/clang/test/CodeGenOpenCL/atomic-ops.cl
+++ b/clang/test/CodeGenOpenCL/atomic-ops.cl
@@ -83,7 +83,7 @@ void fi3(atomic_int *i, atomic_uint *ui) {
bool fi4(atomic_int *i) {
// CHECK-LABEL: @fi4(
- // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup") acquire acquire
+ // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire
// CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0
// CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1
// CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]]
@@ -141,21 +141,21 @@ void fi6(atomic_int *i, int order, int scope) {
// CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]]
// CHECK-NEXT: ]
// CHECK: [[MON_WG]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic
// CHECK: [[MON_DEV]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic
// CHECK: [[MON_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} monotonic
// CHECK: [[MON_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") monotonic
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic
// CHECK: [[ACQ_WG]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire
// CHECK: [[ACQ_DEV]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire
// CHECK: [[ACQ_ALL]]:
// CHECK: load atomic i32, i32* %{{.*}} acquire
// CHECK: [[ACQ_SUB]]:
- // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") acquire
+ // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire
// CHECK: [[SEQ_WG]]:
// CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst
// CHECK: [[SEQ_DEV]]:
@@ -169,13 +169,13 @@ void fi6(atomic_int *i, int order, int scope) {
float ff1(global atomic_float *d) {
// CHECK-LABEL: @ff1
- // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup") monotonic
+ // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic
return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group);
}
void ff2(atomic_float *d) {
// CHECK-LABEL: @ff2
- // CHECK: store atomic i32 {{.*}} syncscope("workgroup") release
+ // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release
__opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group);
}
@@ -198,7 +198,7 @@ void atomic_init_foo()
// CHECK-LABEL: @failureOrder
void failureOrder(atomic_int *ptr, int *ptr2) {
- // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") acquire monotonic
+ // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic
__opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group);
// CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire
OpenPOWER on IntegriCloud