diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-14 14:51:26 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-06-14 14:51:26 +0000 |
commit | 492d71cc99d613b2ebd3fb9776f57a54e5c5682a (patch) | |
tree | 7092ddb203649c93e5795ae096b61f127b7bf92e | |
parent | 0feebef501fc9988493203dee0d86bb7792e587f (diff) | |
download | bcm5719-llvm-492d71cc99d613b2ebd3fb9776f57a54e5c5682a.tar.gz bcm5719-llvm-492d71cc99d613b2ebd3fb9776f57a54e5c5682a.zip |
AMDGPU: Fold readlane intrinsics of constants
I'm not 100% sure about this, since I'm worried about IR transforms
that might end up introducing divergence downstream once replaced with
a constant, but I haven't come up with an example yet.
llvm-svn: 363406
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 7 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll | 56 |
2 files changed, 63 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index f167762b602..347aeb6989a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3776,6 +3776,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { II->setOperand(0, UndefValue::get(Old->getType())); return II; } + case Intrinsic::amdgcn_readfirstlane: + case Intrinsic::amdgcn_readlane: { + // A constant value is trivially uniform. + if (Constant *C = dyn_cast<Constant>(II->getArgOperand(0))) + return replaceInstUsesWith(*II, C); + break; + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index 6c5c02569e5..1d54d78c57d 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -2432,6 +2432,62 @@ define void @kill_true() { } ; -------------------------------------------------------------------- +; llvm.amdgcn.readfirstlane +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.readfirstlane(i32) + +@gv = constant i32 0 + +define amdgpu_kernel void @readfirstlane_constant(i32 %arg) { +; CHECK-LABEL: @readfirstlane_constant( +; CHECK-NEXT: %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) +; CHECK-NEXT: store volatile i32 %var, i32* undef, align 4 +; CHECK-NEXT: store volatile i32 0, i32* undef, align 4 +; CHECK-NEXT: store volatile i32 123, i32* undef, align 4 +; CHECK-NEXT: store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4 +; CHECK-NEXT: store volatile i32 undef, i32* undef, align 4 + %var = call i32 @llvm.amdgcn.readfirstlane(i32 %arg) + %zero = call i32 @llvm.amdgcn.readfirstlane(i32 0) + %imm = call i32 @llvm.amdgcn.readfirstlane(i32 123) + %constexpr = call i32 @llvm.amdgcn.readfirstlane(i32 ptrtoint (i32* @gv to i32)) + %undef = call i32 @llvm.amdgcn.readfirstlane(i32 undef) + store volatile i32 %var, i32* undef + store volatile i32 %zero, i32* undef + store volatile i32 %imm, i32* undef + store volatile i32 %constexpr, i32* undef + store volatile i32 %undef, i32* undef + ret void +} + +; -------------------------------------------------------------------- +; llvm.amdgcn.readlane +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.readlane(i32, i32) + +define amdgpu_kernel void @readlane_constant(i32 %arg, i32 %lane) { +; CHECK-LABEL: @readlane_constant( +; CHECK-NEXT: %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7) +; CHECK-NEXT: store volatile i32 %var, i32* undef, align 4 +; CHECK-NEXT: store volatile i32 0, i32* undef, align 4 +; CHECK-NEXT: store volatile i32 123, i32* undef, align 4 +; CHECK-NEXT: store volatile i32 ptrtoint (i32* @gv to i32), i32* undef, align 4 +; CHECK-NEXT: store volatile i32 undef, i32* undef, align 4 + %var = call i32 @llvm.amdgcn.readlane(i32 %arg, i32 7) + %zero = call i32 @llvm.amdgcn.readlane(i32 0, i32 %lane) + %imm = call i32 @llvm.amdgcn.readlane(i32 123, i32 %lane) + %constexpr = call i32 @llvm.amdgcn.readlane(i32 ptrtoint (i32* @gv to i32), i32 %lane) + %undef = call i32 @llvm.amdgcn.readlane(i32 undef, i32 %lane) + store volatile i32 %var, i32* undef + store volatile i32 %zero, i32* undef + store volatile i32 %imm, i32* undef + store volatile i32 %constexpr, i32* undef + store volatile i32 %undef, i32* undef + ret void +} + +; -------------------------------------------------------------------- ; llvm.amdgcn.update.dpp.i32 ; -------------------------------------------------------------------- |