summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp77
1 files changed, 60 insertions, 17 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 359fc55035f..4e509816716 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -984,13 +984,65 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
if (VWidth == 1)
return nullptr;
- ConstantInt *NewDMask = nullptr;
+ IRBuilderBase::InsertPointGuard Guard(Builder);
+ Builder.SetInsertPoint(II);
+
+ // Assume the arguments are unchanged and later override them, if needed.
+ SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
if (DMaskIdx < 0) {
- // Pretend that a prefix of elements is demanded to simplify the code
- // below.
- DemandedElts = (1 << DemandedElts.getActiveBits()) - 1;
+ // Buffer case.
+
+ const unsigned ActiveBits = DemandedElts.getActiveBits();
+ const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
+
+ // Start assuming the prefix of elements is demanded, but possibly clear some other bits if
+ // there are trailing zeros (unused components at front) and update offset.
+ DemandedElts = (1 << ActiveBits) - 1;
+
+ if (UnusedComponentsAtFront > 0) {
+ static const unsigned InvalidOffsetIdx = 0xf;
+
+ unsigned OffsetIdx;
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ OffsetIdx = 1;
+ break;
+ case Intrinsic::amdgcn_s_buffer_load:
+ // If resulting type is vec3, there is no point in trimming the
+ // load with updated offset, as the vec3 would most likely be widened to
+ // vec4 anyway during lowering.
+ if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
+ OffsetIdx = InvalidOffsetIdx;
+ else
+ OffsetIdx = 1;
+ break;
+ case Intrinsic::amdgcn_buffer_load:
+ case Intrinsic::amdgcn_buffer_load_format:
+ case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load_format:
+ OffsetIdx = 2;
+ break;
+ default:
+ // TODO: handle *tbuffer* intrinsics.
+ OffsetIdx = InvalidOffsetIdx;
+ break;
+ }
+
+ if (OffsetIdx != InvalidOffsetIdx) {
+ // Clear demanded bits and update the offset.
+ DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
+ auto Offset = II->getArgOperand(OffsetIdx);
+ unsigned SingleComponentSizeInBits = getDataLayout().getTypeSizeInBits(II->getType()->getScalarType());
+ unsigned OffsetAdd = UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
+ auto OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
+ Args[OffsetIdx] = Builder.CreateAdd(Offset, OffsetAddVal);
+ }
+ }
} else {
+ // Image case.
+
ConstantInt *DMask = cast<ConstantInt>(II->getArgOperand(DMaskIdx));
unsigned DMaskVal = DMask->getZExtValue() & 0xf;
@@ -1009,7 +1061,7 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
}
if (DMaskVal != NewDMaskVal)
- NewDMask = ConstantInt::get(DMask->getType(), NewDMaskVal);
+ Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
}
unsigned NewNumElts = DemandedElts.countPopulation();
@@ -1017,8 +1069,8 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
return UndefValue::get(II->getType());
if (NewNumElts >= VWidth && DemandedElts.isMask()) {
- if (NewDMask)
- II->setArgOperand(DMaskIdx, NewDMask);
+ if (DMaskIdx >= 0)
+ II->setArgOperand(DMaskIdx, Args[DMaskIdx]);
return nullptr;
}
@@ -1041,16 +1093,6 @@ Value *InstCombiner::simplifyAMDGCNMemoryIntrinsicDemanded(IntrinsicInst *II,
OverloadTys[0] = NewTy;
Function *NewIntrin = Intrinsic::getDeclaration(M, IID, OverloadTys);
- SmallVector<Value *, 16> Args;
- for (unsigned I = 0, E = II->getNumArgOperands(); I != E; ++I)
- Args.push_back(II->getArgOperand(I));
-
- if (NewDMask)
- Args[DMaskIdx] = NewDMask;
-
- IRBuilderBase::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(II);
-
CallInst *NewCall = Builder.CreateCall(NewIntrin, Args);
NewCall->takeName(II);
NewCall->copyMetadata(*II);
@@ -1719,6 +1761,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_s_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_tbuffer_load:
OpenPOWER on IntegriCloud