summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Timofeev <Alexander.Timofeev@amd.com>2017-06-02 15:25:52 +0000
committerAlexander Timofeev <Alexander.Timofeev@amd.com>2017-06-02 15:25:52 +0000
commit3f70b619a9830925f7edbfe51f07a7b45d8a44b9 (patch)
tree655ee3b480e7fb0a2b7da7ce1136b0703dc41cac
parentbbf4f7091f919f93fec6be59260df843e4c02613 (diff)
downloadbcm5719-llvm-3f70b619a9830925f7edbfe51f07a7b45d8a44b9.tar.gz
bcm5719-llvm-3f70b619a9830925f7edbfe51f07a7b45d8a44b9.zip
AMDGPUAnnotateUniformValue should always treat volatile loads as divergent
llvm-svn: 304554
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td1
-rw-r--r--llvm/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll15
3 files changed, 17 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 76c2644867a..b48b2391110 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3571,7 +3571,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
- isMemOpHasNoClobberedMemOperand(Load))
+ !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 5b840a14dbc..73dd8b7daa4 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
+ !Ld->isVolatile() &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
diff --git a/llvm/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll b/llvm/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll
new file mode 100644
index 00000000000..bced3c408c5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: @volatile_load
+; GCN: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
+; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+
+define amdgpu_kernel void @volatile_load(i32 addrspace(1)* %arg, i32 addrspace(1)* nocapture %arg1) {
+bb:
+ %tmp18 = load volatile i32, i32 addrspace(1)* %arg, align 4
+ %tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 5
+ store i32 %tmp18, i32 addrspace(1)* %tmp26, align 4
+ ret void
+}
OpenPOWER on IntegriCloud