diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-25 20:02:54 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-07-25 20:02:54 +0000 |
commit | 7cddfed7e864185e6a93c7f45ac874cd239d2b7a (patch) | |
tree | 2c22235b828dca51cf025335b36dc20b0c48203c /llvm/lib/Transforms/Scalar/Scalarizer.cpp | |
parent | 2fa171c43a4e10442e0e9bc972ceeb5cfd1b9589 (diff) | |
download | bcm5719-llvm-7cddfed7e864185e6a93c7f45ac874cd239d2b7a.tar.gz bcm5719-llvm-7cddfed7e864185e6a93c7f45ac874cd239d2b7a.zip |
Scalarizer: Support scalarizing intrinsics
llvm-svn: 276681
Diffstat (limited to 'llvm/lib/Transforms/Scalar/Scalarizer.cpp')
-rw-r--r-- | llvm/lib/Transforms/Scalar/Scalarizer.cpp | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp index aed4a4ad4d2..39969e27367 100644 --- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp +++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" #include "llvm/Pass.h" @@ -148,6 +149,7 @@ public: bool visitPHINode(PHINode &); bool visitLoadInst(LoadInst &); bool visitStoreInst(StoreInst &); + bool visitCallInst(CallInst &I); static void registerOptions() { // This is disabled by default because having separate loads and stores @@ -169,6 +171,8 @@ private: template<typename T> bool splitBinary(Instruction &, const T &); + bool splitCall(CallInst &CI); + ScatterMap Scattered; GatherList Gathered; unsigned ParallelLoopAccessMDKind; @@ -394,6 +398,77 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) { return true; } +static bool isTriviallyScalariable(Intrinsic::ID ID) { + return isTriviallyVectorizable(ID); +} + +// All of the current scalarizable intrinsics only have one mangled type. +static Function *getScalarIntrinsicDeclaration(Module *M, + Intrinsic::ID ID, + VectorType *Ty) { + return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() }); +} + +/// If a call to a vector typed intrinsic function, split into a scalar call per +/// element if possible for the intrinsic. +bool Scalarizer::splitCall(CallInst &CI) { + VectorType *VT = dyn_cast<VectorType>(CI.getType()); + if (!VT) + return false; + + Function *F = CI.getCalledFunction(); + if (!F) + return false; + + Intrinsic::ID ID = F->getIntrinsicID(); + if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID)) + return false; + + unsigned NumElems = VT->getNumElements(); + unsigned NumArgs = CI.getNumArgOperands(); + + ValueVector ScalarOperands(NumArgs); + SmallVector<Scatterer, 8> Scattered(NumArgs); + + Scattered.resize(NumArgs); + + // Assumes that any vector type has the same number of elements as the return + // vector type, which is true for all current intrinsics. + for (unsigned I = 0; I != NumArgs; ++I) { + Value *OpI = CI.getOperand(I); + if (OpI->getType()->isVectorTy()) { + Scattered[I] = scatter(&CI, OpI); + assert(Scattered[I].size() == NumElems && "mismatched call operands"); + } else { + ScalarOperands[I] = OpI; + } + } + + ValueVector Res(NumElems); + ValueVector ScalarCallOps(NumArgs); + + Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT); + IRBuilder<> Builder(&CI); + + // Perform actual scalarization, taking care to preserve any scalar operands. + for (unsigned Elem = 0; Elem < NumElems; ++Elem) { + ScalarCallOps.clear(); + + for (unsigned J = 0; J != NumArgs; ++J) { + if (hasVectorInstrinsicScalarOpd(ID, J)) + ScalarCallOps.push_back(ScalarOperands[J]); + else + ScalarCallOps.push_back(Scattered[J][Elem]); + } + + Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps, + CI.getName() + ".i" + Twine(Elem)); + } + + gather(&CI, Res); + return true; +} + bool Scalarizer::visitSelectInst(SelectInst &SI) { VectorType *VT = dyn_cast<VectorType>(SI.getType()); if (!VT) @@ -642,6 +717,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) { return true; } +bool Scalarizer::visitCallInst(CallInst &CI) { + return splitCall(CI); +} + // Delete the instructions that we scalarized. If a full vector result // is still needed, recreate it using InsertElements. bool Scalarizer::finish() { |