diff options
author | David Majnemer <david.majnemer@gmail.com> | 2016-07-14 00:29:50 +0000 |
---|---|---|
committer | David Majnemer <david.majnemer@gmail.com> | 2016-07-14 00:29:50 +0000 |
commit | 7f781aba9755633c53210db02d51f6f56d11ea48 (patch) | |
tree | 939807d2484ee567af45c3c88bf2e4724b205d14 /llvm | |
parent | 63497c649245cc384c2986f63f713bef490d8da2 (diff) | |
download | bcm5719-llvm-7f781aba9755633c53210db02d51f6f56d11ea48.tar.gz bcm5719-llvm-7f781aba9755633c53210db02d51f6f56d11ea48.zip |
[ConstantFolding] Fold masked loads
We can constant fold a masked load if the operands are appropriately
constant.
Differential Revision: http://reviews.llvm.org/D22324
llvm-svn: 275352
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Analysis/ConstantFolding.cpp | 37 | ||||
-rw-r--r-- | llvm/test/Transforms/InstSimplify/call.ll | 11 |
2 files changed, 47 insertions, 1 deletions
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index dcfe700bf94..96a2d02ed5b 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1299,6 +1299,7 @@ bool llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::fmuladd: case Intrinsic::copysign: case Intrinsic::round: + case Intrinsic::masked_load: case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: @@ -1843,11 +1844,44 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty, Constant *ConstantFoldVectorCall(StringRef Name, unsigned IntrinsicID, VectorType *VTy, ArrayRef<Constant *> Operands, + const DataLayout &DL, const TargetLibraryInfo *TLI) { SmallVector<Constant *, 4> Result(VTy->getNumElements()); SmallVector<Constant *, 4> Lane(Operands.size()); Type *Ty = VTy->getElementType(); + if (IntrinsicID == Intrinsic::masked_load) { + auto *SrcPtr = Operands[0]; + auto *Mask = Operands[2]; + auto *Passthru = Operands[3]; + Constant *VecData = ConstantFoldLoadFromConstPtr(SrcPtr, VTy, DL); + if (!VecData) + return nullptr; + + SmallVector<Constant *, 32> NewElements; + for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { + auto *MaskElt = + dyn_cast_or_null<ConstantInt>(Mask->getAggregateElement(I)); + if (!MaskElt) + break; + if (MaskElt->isZero()) { + auto *PassthruElt = Passthru->getAggregateElement(I); + if (!PassthruElt) + break; + NewElements.push_back(PassthruElt); + } else { + assert(MaskElt->isOne()); + auto *VecElt = VecData->getAggregateElement(I); + if (!VecElt) + break; + NewElements.push_back(VecElt); + } + } + if (NewElements.size() == VTy->getNumElements()) + return ConstantVector::get(NewElements); + return nullptr; + } + for (unsigned I = 0, E = VTy->getNumElements(); I != E; ++I) { // Gather a column of constants. for (unsigned J = 0, JE = Operands.size(); J != JE; ++J) { @@ -1880,7 +1914,8 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands, Type *Ty = F->getReturnType(); if (auto *VTy = dyn_cast<VectorType>(Ty)) - return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, TLI); + return ConstantFoldVectorCall(Name, F->getIntrinsicID(), VTy, Operands, + F->getParent()->getDataLayout(), TLI); return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI); } diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll index 244ce81d5ba..e0a071a3bb1 100644 --- a/llvm/test/Transforms/InstSimplify/call.ll +++ b/llvm/test/Transforms/InstSimplify/call.ll @@ -204,4 +204,15 @@ entry: ; CHECK-LABEL: define i32 @call_undef( ; CHECK: ret i32 undef +@GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49] + +define <8 x i32> @partial_masked_load() { +; CHECK-LABEL: @partial_masked_load( +; CHECK: ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> + %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -2) to <8 x i32>*), i32 4, <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) + ret <8 x i32> %masked.load +} + declare noalias i8* @malloc(i64) + +declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>) |