diff options
| author | Amara Emerson <amara.emerson@arm.com> | 2017-05-09 10:43:25 +0000 |
|---|---|---|
| committer | Amara Emerson <amara.emerson@arm.com> | 2017-05-09 10:43:25 +0000 |
| commit | cf9daa33a7870c235e0edc176dd40579f376cafc (patch) | |
| tree | 4df699a6f02c81cbbc2c7c4639c299a0dea5632c /llvm/include | |
| parent | b7bf386e8098aed73f0b9b2df40067afc07dffab (diff) | |
| download | bcm5719-llvm-cf9daa33a7870c235e0edc176dd40579f376cafc.tar.gz bcm5719-llvm-cf9daa33a7870c235e0edc176dd40579f376cafc.zip | |
Introduce experimental generic intrinsics for horizontal vector reductions.
- This change allows targets to opt-in to using them instead of the log2
shufflevector algorithm.
- The SLP and Loop vectorizers have the common code to do shuffle reductions
factored out into LoopUtils, and now have a unified interface for generating
reductions regardless of the preference of the target. LoopUtils now uses TTI
to determine what kind of reductions the target wants to handle.
- For CodeGen, basic legalization support is added.
Differential Revision: https://reviews.llvm.org/D30086
llvm-svn: 302514
Diffstat (limited to 'llvm/include')
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfo.h | 19 | ||||
| -rw-r--r-- | llvm/include/llvm/Analysis/TargetTransformInfoImpl.h | 6 | ||||
| -rw-r--r-- | llvm/include/llvm/CodeGen/ISDOpcodes.h | 14 | ||||
| -rw-r--r-- | llvm/include/llvm/IR/IRBuilder.h | 39 | ||||
| -rw-r--r-- | llvm/include/llvm/IR/Intrinsics.td | 44 | ||||
| -rw-r--r-- | llvm/include/llvm/Transforms/Utils/LoopUtils.h | 26 |
6 files changed, 148 insertions, 0 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index b9639dba188..a769d5f67dd 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -740,6 +740,19 @@ public: unsigned ChainSizeInBytes, VectorType *VecTy) const; + /// Flags describing the kind of vector reduction. + struct ReductionFlags { + ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {} + bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation. + bool IsSigned; ///< Whether the operation is a signed int reduction. + bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present. + }; + + /// \returns True if the target wants to handle the given reduction idiom in + /// the intrinsics form instead of the shuffle form. + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const; + /// @} private: @@ -895,6 +908,8 @@ public: virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const = 0; + virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags) const = 0; }; template <typename T> @@ -1200,6 +1215,10 @@ public: VectorType *VecTy) const override { return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); } + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + ReductionFlags Flags) const override { + return Impl.useReductionIntrinsic(Opcode, Ty, Flags); + } }; template <typename T> diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index d7fda9e14b0..83b975e94f6 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -456,6 +456,12 @@ public: VectorType *VecTy) const { return VF; } + + bool useReductionIntrinsic(unsigned Opcode, Type *Ty, + TTI::ReductionFlags Flags) const { + return false; + } + protected: // Obtain the minimum required size to hold the value (without the sign) // In case of a vector it returns the min required size for one element. diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index ca0f3fbad89..2bc218f0aec 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -783,6 +783,20 @@ namespace ISD { /// known nonzero constant. The only operand here is the chain. GET_DYNAMIC_AREA_OFFSET, + /// Generic reduction nodes. These nodes represent horizontal vector + /// reduction operations, producing a scalar result. + /// The STRICT variants perform reductions in sequential order. The first + /// operand is an initial scalar accumulator value, and the second operand + /// is the vector to reduce. + VECREDUCE_STRICT_FADD, VECREDUCE_STRICT_FMUL, + /// These reductions are non-strict, and have a single vector operand. + VECREDUCE_FADD, VECREDUCE_FMUL, + VECREDUCE_ADD, VECREDUCE_MUL, + VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR, + VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN, + /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants. + VECREDUCE_FMAX, VECREDUCE_FMIN, + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index bc689f3b01d..9d4c13c29f6 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -454,6 +454,45 @@ public: MDNode *ScopeTag = nullptr, MDNode *NoAliasTag = nullptr); + /// \brief Create a vector fadd reduction intrinsic of the source vector. + /// The first parameter is a scalar accumulator value for ordered reductions. + CallInst *CreateFAddReduce(Value *Acc, Value *Src); + + /// \brief Create a vector fmul reduction intrinsic of the source vector. + /// The first parameter is a scalar accumulator value for ordered reductions. + CallInst *CreateFMulReduce(Value *Acc, Value *Src); + + /// \brief Create a vector int add reduction intrinsic of the source vector. + CallInst *CreateAddReduce(Value *Src); + + /// \brief Create a vector int mul reduction intrinsic of the source vector. + CallInst *CreateMulReduce(Value *Src); + + /// \brief Create a vector int AND reduction intrinsic of the source vector. + CallInst *CreateAndReduce(Value *Src); + + /// \brief Create a vector int OR reduction intrinsic of the source vector. + CallInst *CreateOrReduce(Value *Src); + + /// \brief Create a vector int XOR reduction intrinsic of the source vector. + CallInst *CreateXorReduce(Value *Src); + + /// \brief Create a vector integer max reduction intrinsic of the source + /// vector. + CallInst *CreateIntMaxReduce(Value *Src, bool IsSigned = false); + + /// \brief Create a vector integer min reduction intrinsic of the source + /// vector. + CallInst *CreateIntMinReduce(Value *Src, bool IsSigned = false); + + /// \brief Create a vector float max reduction intrinsic of the source + /// vector. + CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false); + + /// \brief Create a vector float min reduction intrinsic of the source + /// vector. + CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false); + /// \brief Create a lifetime.start intrinsic. /// /// If the pointer isn't i8* it will be converted. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 7b78d4d3d34..19f6045568f 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -812,6 +812,50 @@ def int_memcpy_element_atomic : Intrinsic<[], [IntrArgMemOnly, NoCapture<0>, NoCapture<1>, WriteOnly<0>, ReadOnly<1>]>; +//===------------------------ Reduction Intrinsics ------------------------===// +// +def int_experimental_vector_reduce_fadd : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyfloat_ty, + llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_fmul : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyfloat_ty, + llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_add : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_mul : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_and : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_or : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_xor : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_smax : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_smin : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_umax : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_umin : Intrinsic<[llvm_anyint_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_fmax : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; +def int_experimental_vector_reduce_fmin : Intrinsic<[llvm_anyfloat_ty], + [llvm_anyvector_ty], + [IntrNoMem]>; + //===----- Intrinsics that are used to provide predicate information -----===// def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>], diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index a1cf41d6f93..94d10c98eb0 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" @@ -42,6 +43,7 @@ class PredIteratorCache; class ScalarEvolution; class SCEV; class TargetLibraryInfo; +class TargetTransformInfo; /// \brief Captures loop safety information. /// It keep information for loop & its header may throw exception. @@ -489,6 +491,30 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, LoopSafetyInfo *SafetyInfo, OptimizationRemarkEmitter *ORE = nullptr); +/// Create a target reduction of the given vector. The reduction operation +/// is described by the \p Opcode parameter. min/max reductions require +/// additional information supplied in \p Flags. +/// The target is queried to determine if intrinsics or shuffle sequences are +/// required to implement the reduction. +Value * +createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, + unsigned Opcode, Value *Src, + TargetTransformInfo::ReductionFlags Flags = + TargetTransformInfo::ReductionFlags(), + ArrayRef<Value *> RedOps = ArrayRef<Value *>()); + +/// Create a generic target reduction using a recurrence descriptor \p Desc +/// The target is queried to determine if intrinsics or shuffle sequences are +/// required to implement the reduction. +Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, + RecurrenceDescriptor &Desc, Value *Src, + bool NoNaN = false); + +/// Get the intersection (logical and) of all of the potential IR flags +/// of each scalar operation (VL) that will be converted into a vector (I). +/// Flag set: NSW, NUW, exact, and all of fast-math. +void propagateIRFlags(Value *I, ArrayRef<Value *> VL); + } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H |

