summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-14 14:51:26 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-14 14:51:26 +0000
commit492d71cc99d613b2ebd3fb9776f57a54e5c5682a (patch)
tree7092ddb203649c93e5795ae096b61f127b7bf92e
parent0feebef501fc9988493203dee0d86bb7792e587f (diff)
downloadbcm5719-llvm-492d71cc99d613b2ebd3fb9776f57a54e5c5682a.tar.gz
bcm5719-llvm-492d71cc99d613b2ebd3fb9776f57a54e5c5682a.zip
AMDGPU: Fold readlane intrinsics of constants
I'm not 100% sure about this, since I'm worried about IR transforms that might end up introducing divergence downstream once replaced with a constant, but I haven't come up with an example yet. llvm-svn: 363406
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp7
-rw-r--r--llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll56
2 files changed, 63 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f167762b602..347aeb6989a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3776,6 +3776,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
II->setOperand(0, UndefValue::get(Old->getType()));
return II;
}
+ case Intrinsic::amdgcn_readfirstlane:
+ case Intrinsic::amdgcn_readlane: {
+ // A constant value is trivially uniform.
+ if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0)))
+ return replaceInstUsesWith(*II, C);
+ break;
+ }
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 6c5c02569e5..1d54d78c57d 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -2432,6 +2432,62 @@ define void @kill_true() {
}
; --------------------------------------------------------------------
+; llvm.amdgcn.readfirstlane
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.readfirstlane(i32)
+
+@gv = constant i32 0
+
+define amdgpu_kernel void @readfirstlane_constant(i32 %arg) {
+; CHECK-LABEL: @readfirstlane_constant(
+; CHECK-NEXT: %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+; CHECK-NEXT: store volatile i32 %var, i32* undef, align 4
+; CHECK-NEXT: store volatile i32 0, i32* undef, align 4
+; CHECK-NEXT: store volatile i32 123, i32* undef, align 4
+; CHECK-NEXT: store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
+; CHECK-NEXT: store volatile i32 undef, i32* undef, align 4
+ %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg)
+ %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0)
+ %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123)
+ %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (i32* @gv to i32))
+ %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef)
+ store volatile i32 %var, i32* undef
+ store volatile i32 %zero, i32* undef
+ store volatile i32 %imm, i32* undef
+ store volatile i32 %constexpr, i32* undef
+ store volatile i32 %undef, i32* undef
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.readlane
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.readlane(i32, i32)
+
+define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) {
+; CHECK-LABEL: @readlane_constant(
+; CHECK-NEXT: %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7)
+; CHECK-NEXT: store volatile i32 %var, i32* undef, align 4
+; CHECK-NEXT: store volatile i32 0, i32* undef, align 4
+; CHECK-NEXT: store volatile i32 123, i32* undef, align 4
+; CHECK-NEXT: store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4
+; CHECK-NEXT: store volatile i32 undef, i32* undef, align 4
+ %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7)
+ %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane)
+ %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane)
+ %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (i32* @gv to i32), i32 %lane)
+ %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane)
+ store volatile i32 %var, i32* undef
+ store volatile i32 %zero, i32* undef
+ store volatile i32 %imm, i32* undef
+ store volatile i32 %constexpr, i32* undef
+ store volatile i32 %undef, i32* undef
+ ret void
+}
+
+; --------------------------------------------------------------------
; llvm.amdgcn.update.dpp.i32
; --------------------------------------------------------------------
OpenPOWER on IntegriCloud