diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2018-05-22 08:04:33 +0000 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2018-05-22 08:04:33 +0000 |
| commit | 0e132dca53ebd7e43145ba9c31d714630ff9f78d (patch) | |
| tree | 56f2b0a3bd0161f72660e85679b51ec6b638bc78 | |
| parent | 7c6cd52698ef806ce92b9b85ee28c021a9a4f018 (diff) | |
| download | bcm5719-llvm-0e132dca53ebd7e43145ba9c31d714630ff9f78d.tar.gz bcm5719-llvm-0e132dca53ebd7e43145ba9c31d714630ff9f78d.zip | |
[AMDGPU] Optimze old value of v_mov_b32_dpp
We can eliminate old value if bound_ctrl = 1 and row_mask = bank_mask = 0xf.
This is alternative implementation working with the intrinsic in InstCombine.
Original review for past-ISel optimization: D46570.
Differential Revision: https://reviews.llvm.org/D46596
llvm-svn: 332956
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 17 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll | 29 |
2 files changed, 46 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2516179e8b4..8b7c28281ca 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3407,6 +3407,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // amdgcn.kill(i1 1) is a no-op return eraseInstFromFunction(CI); } + case Intrinsic::amdgcn_update_dpp: { + Value *Old = II->getArgOperand(0); + + auto BC = dyn_cast<ConstantInt>(II->getArgOperand(5)); + auto RM = dyn_cast<ConstantInt>(II->getArgOperand(3)); + auto BM = dyn_cast<ConstantInt>(II->getArgOperand(4)); + if (!BC || !RM || !BM || + BC->isZeroValue() || + RM->getZExtValue() != 0xF || + BM->getZExtValue() != 0xF || + isa<UndefValue>(Old)) + break; + + // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value. + II->setOperand(0, UndefValue::get(Old->getType())); + return II; + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll index c8a05204bf5..e66e054c780 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll @@ -1692,5 +1692,34 @@ define void @kill_true() { ret void } +; -------------------------------------------------------------------- +; llvm.amdgcn.update.dpp.i32 +; -------------------------------------------------------------------- + +declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) + +; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_no_combine( +; CHECK: @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) +define amdgpu_kernel void @update_dpp_no_combine(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { + %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 0) + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_drop_old( +; CHECK: @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in2, i32 3, i32 15, i32 15, i1 true) +define amdgpu_kernel void @update_dpp_drop_old(i32 addrspace(1)* %out, i32 %in1, i32 %in2) { + %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 3, i32 15, i32 15, i1 1) + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} + +; CHECK-LABEL: {{^}}define amdgpu_kernel void @update_dpp_undef_old( +; CHECK: @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 true) +define amdgpu_kernel void @update_dpp_undef_old(i32 addrspace(1)* %out, i32 %in1) { + %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 undef, i32 %in1, i32 4, i32 15, i32 15, i1 1) + store i32 %tmp0, i32 addrspace(1)* %out + ret void +} ; CHECK: attributes #5 = { convergent } |

