diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-17 15:12:44 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-17 15:12:44 +0000 |
commit | 7205f3c2e481b58856b3a3186e71aec87261fbd3 (patch) | |
tree | a8b9ff993b7f76d67b63d1f56c3457c859a486e3 /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | |
parent | ce161a781253952b710d4527562ee0de21b9c063 (diff) | |
download | bcm5719-llvm-7205f3c2e481b58856b3a3186e71aec87261fbd3.tar.gz bcm5719-llvm-7205f3c2e481b58856b3a3186e71aec87261fbd3.zip |
AMDGPU: SimplifyDemandedElts for image intrinsics
Causes some VGPR usage improvements in shaderdb, but
introduces some SGPR spilling regressions due to random
scheduling changes later.
llvm-svn: 300453
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 83 |
1 files changed, 80 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index abc7d07d451..3ce5a5c33c2 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1641,7 +1641,52 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, UndefElts.setHighBits(VWidth / 2); break; case Intrinsic::amdgcn_buffer_load: - case Intrinsic::amdgcn_buffer_load_format: { + case Intrinsic::amdgcn_buffer_load_format: + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + case Intrinsic::amdgcn_image_getlod: { if (VWidth == 1 || !DemandedElts.isMask()) return nullptr; @@ -1656,8 +1701,17 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, Type *NewTy = (NewNumElts == 1) ? EltTy : VectorType::get(EltTy, NewNumElts); - Function *NewIntrin = Intrinsic::getDeclaration(M, II->getIntrinsicID(), - NewTy); + auto IID = II->getIntrinsicID(); + + bool IsBuffer = IID == Intrinsic::amdgcn_buffer_load || + IID == Intrinsic::amdgcn_buffer_load_format; + + Function *NewIntrin = IsBuffer ? + Intrinsic::getDeclaration(M, IID, NewTy) : + // Samplers have 3 mangled types. + Intrinsic::getDeclaration(M, IID, + { NewTy, II->getArgOperand(0)->getType(), + II->getArgOperand(1)->getType()}); SmallVector<Value *, 5> Args; for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I) @@ -1669,6 +1723,29 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, CallInst *NewCall = Builder->CreateCall(NewIntrin, Args); NewCall->takeName(II); NewCall->copyMetadata(*II); + + if (!IsBuffer) { + ConstantInt *DMask = dyn_cast<ConstantInt>(NewCall->getArgOperand(3)); + if (DMask) { + unsigned DMaskVal = DMask->getZExtValue() & 0xf; + + unsigned PopCnt = 0; + unsigned NewDMask = 0; + for (unsigned I = 0; I < 4; ++I) { + const unsigned Bit = 1 << I; + if (!!(DMaskVal & Bit)) { + if (++PopCnt > NewNumElts) + break; + + NewDMask |= Bit; + } + } + + NewCall->setArgOperand(3, ConstantInt::get(DMask->getType(), NewDMask)); + } + } + + if (NewNumElts == 1) { return Builder->CreateInsertElement(UndefValue::get(V->getType()), NewCall, static_cast<uint64_t>(0)); |