diff options
Diffstat (limited to 'llvm/lib/Analysis/TargetTransformInfo.cpp')
-rw-r--r-- | llvm/lib/Analysis/TargetTransformInfo.cpp | 557 |
1 files changed, 557 insertions, 0 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index 8673b1b55d9..0884eea329a 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -16,11 +16,13 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include <utility> using namespace llvm; +using namespace PatternMatch; #define DEBUG_TYPE "tti" @@ -29,6 +31,10 @@ static cl::opt<bool> UseWideMemcpyLoopLowering( cl::desc("Enables the new wide memcpy loop lowering in Transforms/Utils."), cl::Hidden); +static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), + cl::Hidden, + cl::desc("Recognize reduction patterns.")); + namespace { /// \brief No-op implementation of the TTI interface using the utility base /// classes. @@ -583,6 +589,557 @@ bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const { return TTIImpl->shouldExpandReduction(II); } +int TargetTransformInfo::getInstructionLatency(const Instruction *I) const { + return TTIImpl->getInstructionLatency(I); +} + +static bool isReverseVectorMask(ArrayRef<int> Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + +static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { + bool Vec0 = false; + bool Vec1 = false; + for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { + if (Mask[i] >= 0) { + if ((unsigned)Mask[i] >= NumVecElts) + Vec1 = true; + else + Vec0 = true; + } + } + return !(Vec0 && Vec1); +} + +static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { + for (unsigned i = 0; i < Mask.size(); ++i) + if (Mask[i] > 0) + return false; + return true; +} + +static bool isAlternateVectorMask(ArrayRef<int> Mask) { + bool isAlternate = true; + unsigned MaskSize = Mask.size(); + + // Example: shufflevector A, B, <0,5,2,7> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); + } + + if (isAlternate) + return true; + + isAlternate = true; + // Example: shufflevector A, B, <4,1,6,3> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + } + + return isAlternate; +} + +static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + + // Check for a splat of a constant or for a non uniform vector of constants. + if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { + OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; + if (cast<Constant>(V)->getSplatValue() != nullptr) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + } + + // Check for a splat of a uniform value. This is not loop aware, so return + // true only for the obviously uniform cases (argument, globalvalue) + const Value *Splat = getSplatValue(V); + if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) + OpInfo = TargetTransformInfo::OK_UniformValue; + + return OpInfo; +} + +static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, + unsigned Level) { + // We don't need a shuffle if we just want to have element 0 in position 0 of + // the vector. + if (!SI && Level == 0 && IsLeft) + return true; + else if (!SI) + return false; + + SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); + + // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether + // we look at the left or right side. + for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) + Mask[i] = val; + + SmallVector<int, 16> ActualMask = SI->getShuffleMask(); + return Mask == ActualMask; +} + +namespace { +/// Kind of the reduction data. +enum ReductionKind { + RK_None, /// Not a reduction. + RK_Arithmetic, /// Binary reduction data. + RK_MinMax, /// Min/max reduction data. + RK_UnsignedMinMax, /// Unsigned min/max reduction data. +}; +/// Contains opcode + LHS/RHS parts of the reduction operations. +struct ReductionData { + ReductionData() = delete; + ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS) + : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) { + assert(Kind != RK_None && "expected binary or min/max reduction only."); + } + unsigned Opcode = 0; + Value *LHS = nullptr; + Value *RHS = nullptr; + ReductionKind Kind = RK_None; + bool hasSameData(ReductionData &RD) const { + return Kind == RD.Kind && Opcode == RD.Opcode; + } +}; +} // namespace + +static Optional<ReductionData> getReductionData(Instruction *I) { + Value *L, *R; + if (m_BinOp(m_Value(L), m_Value(R)).match(I)) + return ReductionData(RK_Arithmetic, I->getOpcode(), L, R); + if (auto *SI = dyn_cast<SelectInst>(I)) { + if (m_SMin(m_Value(L), m_Value(R)).match(SI) || + m_SMax(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMin(m_Value(L), m_Value(R)).match(SI) || + m_OrdFMax(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMin(m_Value(L), m_Value(R)).match(SI) || + m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast<CmpInst>(SI->getCondition()); + return ReductionData(RK_MinMax, CI->getOpcode(), L, R); + } + if (m_UMin(m_Value(L), m_Value(R)).match(SI) || + m_UMax(m_Value(L), m_Value(R)).match(SI)) { + auto *CI = cast<CmpInst>(SI->getCondition()); + return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R); + } + } + return llvm::None; +} + +static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, + unsigned Level, + unsigned NumLevels) { + // Match one level of pairwise operations. + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + if (!I) + return RK_None; + + assert(I->getType()->isVectorTy() && "Expecting a vector type"); + + Optional<ReductionData> RD = getReductionData(I); + if (!RD) + return RK_None; + + ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS); + if (!LS && Level) + return RK_None; + ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS); + if (!RS && Level) + return RK_None; + + // On level 0 we can omit one shufflevector instruction. + if (!Level && !RS && !LS) + return RK_None; + + // Shuffle inputs must match. + Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; + Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; + Value *NextLevelOp = nullptr; + if (NextLevelOpR && NextLevelOpL) { + // If we have two shuffles their operands must match. + if (NextLevelOpL != NextLevelOpR) + return RK_None; + + NextLevelOp = NextLevelOpL; + } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { + // On the first level we can omit the shufflevector <0, undef,...>. So the + // input to the other shufflevector <1, undef> must match with one of the + // inputs to the current binary operation. + // Example: + // %NextLevelOpL = shufflevector %R, <1, undef ...> + // %BinOp = fadd %NextLevelOpL, %R + if (NextLevelOpL && NextLevelOpL != RD->RHS) + return RK_None; + else if (NextLevelOpR && NextLevelOpR != RD->LHS) + return RK_None; + + NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS; + } else + return RK_None; + + // Check that the next levels binary operation exists and matches with the + // current one. + if (Level + 1 != NumLevels) { + Optional<ReductionData> NextLevelRD = + getReductionData(cast<Instruction>(NextLevelOp)); + if (!NextLevelRD || !RD->hasSameData(*NextLevelRD)) + return RK_None; + } + + // Shuffle mask for pairwise operation must match. + if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level)) + return RK_None; + } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) { + if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level)) + return RK_None; + } else { + return RK_None; + } + + if (++Level == NumLevels) + return RD->Kind; + + // Match next level. + return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level, + NumLevels); +} + +static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional<ReductionData> RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = RdxStart->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffle,shuffle,add triples like the following + // that builds a pairwise reduction tree. + // + // (X0, X1, X2, X3) + // (X0 + X1, X2 + X3, undef, undef) + // ((X0 + X1) + (X2 + X3), undef, undef, undef) + // + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> + // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) == + RK_None) + return RK_None; + + Opcode = RD->Opcode; + Ty = VecTy; + + return RD->Kind; +} + +static std::pair<Value *, ShuffleVectorInst *> +getShuffleAndOtherOprd(Value *L, Value *R) { + ShuffleVectorInst *S = nullptr; + + if ((S = dyn_cast<ShuffleVectorInst>(L))) + return std::make_pair(R, S); + + S = dyn_cast<ShuffleVectorInst>(R); + return std::make_pair(L, S); +} + +static ReductionKind +matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return RK_None; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return RK_None; + + auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0)); + if (!RdxStart) + return RK_None; + Optional<ReductionData> RD = getReductionData(RdxStart); + if (!RD) + return RK_None; + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return RK_None; + + // We look for a sequence of shuffles and adds like the following matching one + // fadd, shuffle vector pair at a time. + // + // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> + // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf + // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, + // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> + // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + + unsigned MaskStart = 1; + Instruction *RdxOp = RdxStart; + SmallVector<int, 32> ShuffleMask(NumVecElems, 0); + unsigned NumVecElemsRemain = NumVecElems; + while (NumVecElemsRemain - 1) { + // Check for the right reduction operation. + if (!RdxOp) + return RK_None; + Optional<ReductionData> RDLevel = getReductionData(RdxOp); + if (!RDLevel || !RDLevel->hasSameData(*RD)) + return RK_None; + + Value *NextRdxOp; + ShuffleVectorInst *Shuffle; + std::tie(NextRdxOp, Shuffle) = + getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS); + + // Check the current reduction operation and the shuffle use the same value. + if (Shuffle == nullptr) + return RK_None; + if (Shuffle->getOperand(0) != NextRdxOp) + return RK_None; + + // Check that shuffle masks matches. + for (unsigned j = 0; j != MaskStart; ++j) + ShuffleMask[j] = MaskStart + j; + // Fill the rest of the mask with -1 for undef. + std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); + + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + if (ShuffleMask != Mask) + return RK_None; + + RdxOp = dyn_cast<Instruction>(NextRdxOp); + NumVecElemsRemain /= 2; + MaskStart *= 2; + } + + Opcode = RD->Opcode; + Ty = VecTy; + return RD->Kind; +} + +int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const { + switch (I->getOpcode()) { + case Instruction::GetElementPtr: + return getUserCost(I); + + case Instruction::Ret: + case Instruction::PHI: + case Instruction::Br: { + return getCFInstrCost(I->getOpcode()); + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + TargetTransformInfo::OperandValueKind Op1VK = + getOperandInfo(I->getOperand(0)); + TargetTransformInfo::OperandValueKind Op2VK = + getOperandInfo(I->getOperand(1)); + SmallVector<const Value*, 2> Operands(I->operand_values()); + return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, + Op2VK, TargetTransformInfo::OP_None, + TargetTransformInfo::OP_None, + Operands); + } + case Instruction::Select: { + const SelectInst *SI = cast<SelectInst>(I); + Type *CondTy = SI->getCondition()->getType(); + return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I); + } + case Instruction::ICmp: + case Instruction::FCmp: { + Type *ValTy = I->getOperand(0)->getType(); + return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I); + } + case Instruction::Store: { + const StoreInst *SI = cast<StoreInst>(I); + Type *ValTy = SI->getValueOperand()->getType(); + return getMemoryOpCost(I->getOpcode(), ValTy, + SI->getAlignment(), + SI->getPointerAddressSpace(), I); + } + case Instruction::Load: { + const LoadInst *LI = cast<LoadInst>(I); + return getMemoryOpCost(I->getOpcode(), I->getType(), + LI->getAlignment(), + LI->getPointerAddressSpace(), I); + } + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::FPExt: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::SIToFP: + case Instruction::UIToFP: + case Instruction::Trunc: + case Instruction::FPTrunc: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: { + Type *SrcTy = I->getOperand(0)->getType(); + return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I); + } + case Instruction::ExtractElement: { + const ExtractElementInst * EEI = cast<ExtractElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + unsigned ReduxOpCode; + Type *ReduxType; + + switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/false); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/false, /*IsUnsigned=*/true); + case RK_None: + break; + } + + switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) { + case RK_Arithmetic: + return getArithmeticReductionCost(ReduxOpCode, ReduxType, + /*IsPairwiseForm=*/true); + case RK_MinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/false); + case RK_UnsignedMinMax: + return getMinMaxReductionCost( + ReduxType, CmpInst::makeCmpResultType(ReduxType), + /*IsPairwiseForm=*/true, /*IsUnsigned=*/true); + case RK_None: + break; + } + + return getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); + } + case Instruction::InsertElement: { + const InsertElementInst * IE = cast<InsertElementInst>(I); + ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size()) { + if (isReverseVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, + 0, nullptr); + if (isAlternateVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Alternate, + VecTypOp0, 0, nullptr); + + if (isZeroEltBroadcastVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_Broadcast, + VecTypOp0, 0, nullptr); + + if (isSingleSourceVectorMask(Mask)) + return getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, + VecTypOp0, 0, nullptr); + + return getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + VecTypOp0, 0, nullptr); + } + + return -1; + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + SmallVector<Value *, 4> Args(II->arg_operands()); + + FastMathFlags FMF; + if (auto *FPMO = dyn_cast<FPMathOperator>(II)) + FMF = FPMO->getFastMathFlags(); + + return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Args, FMF); + } + return -1; + default: + // We don't have any information on this instruction. + return -1; + } +} + TargetTransformInfo::Concept::~Concept() {} TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {} |