summaryrefslogtreecommitdiffstats
path: root/llvm/include
diff options
context:
space:
mode:
authorAmara Emerson <amara.emerson@arm.com>2017-05-09 10:43:25 +0000
committerAmara Emerson <amara.emerson@arm.com>2017-05-09 10:43:25 +0000
commitcf9daa33a7870c235e0edc176dd40579f376cafc (patch)
tree4df699a6f02c81cbbc2c7c4639c299a0dea5632c /llvm/include
parentb7bf386e8098aed73f0b9b2df40067afc07dffab (diff)
downloadbcm5719-llvm-cf9daa33a7870c235e0edc176dd40579f376cafc.tar.gz
bcm5719-llvm-cf9daa33a7870c235e0edc176dd40579f376cafc.zip
Introduce experimental generic intrinsics for horizontal vector reductions.
- This change allows targets to opt-in to using them instead of the log2 shufflevector algorithm. - The SLP and Loop vectorizers have the common code to do shuffle reductions factored out into LoopUtils, and now have a unified interface for generating reductions regardless of the preference of the target. LoopUtils now uses TTI to determine what kind of reductions the target wants to handle. - For CodeGen, basic legalization support is added. Differential Revision: https://reviews.llvm.org/D30086 llvm-svn: 302514
Diffstat (limited to 'llvm/include')
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h19
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h6
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h14
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h39
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td44
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h26
6 files changed, 148 insertions, 0 deletions
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index b9639dba188..a769d5f67dd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -740,6 +740,19 @@ public:
unsigned ChainSizeInBytes,
VectorType *VecTy) const;
+ /// Flags describing the kind of vector reduction.
+ struct ReductionFlags {
+ ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
+ bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
+ bool IsSigned; ///< Whether the operation is a signed int reduction.
+ bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
+ };
+
+ /// \returns True if the target wants to handle the given reduction idiom in
+ /// the intrinsics form instead of the shuffle form.
+ bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const;
+
/// @}
private:
@@ -895,6 +908,8 @@ public:
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
+ virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ ReductionFlags) const = 0;
};
template <typename T>
@@ -1200,6 +1215,10 @@ public:
VectorType *VecTy) const override {
return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
}
+ bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const override {
+ return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
+ }
};
template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d7fda9e14b0..83b975e94f6 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -456,6 +456,12 @@ public:
VectorType *VecTy) const {
return VF;
}
+
+ bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
+ TTI::ReductionFlags Flags) const {
+ return false;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index ca0f3fbad89..2bc218f0aec 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -783,6 +783,20 @@ namespace ISD {
/// known nonzero constant. The only operand here is the chain.
GET_DYNAMIC_AREA_OFFSET,
+ /// Generic reduction nodes. These nodes represent horizontal vector
+ /// reduction operations, producing a scalar result.
+ /// The STRICT variants perform reductions in sequential order. The first
+ /// operand is an initial scalar accumulator value, and the second operand
+ /// is the vector to reduce.
+ VECREDUCE_STRICT_FADD, VECREDUCE_STRICT_FMUL,
+ /// These reductions are non-strict, and have a single vector operand.
+ VECREDUCE_FADD, VECREDUCE_FMUL,
+ VECREDUCE_ADD, VECREDUCE_MUL,
+ VECREDUCE_AND, VECREDUCE_OR, VECREDUCE_XOR,
+ VECREDUCE_SMAX, VECREDUCE_SMIN, VECREDUCE_UMAX, VECREDUCE_UMIN,
+ /// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
+ VECREDUCE_FMAX, VECREDUCE_FMIN,
+
/// BUILTIN_OP_END - This must be the last enum value in this list.
/// The target-specific pre-isel opcode values start here.
BUILTIN_OP_END
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index bc689f3b01d..9d4c13c29f6 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -454,6 +454,45 @@ public:
MDNode *ScopeTag = nullptr,
MDNode *NoAliasTag = nullptr);
+ /// \brief Create a vector fadd reduction intrinsic of the source vector.
+ /// The first parameter is a scalar accumulator value for ordered reductions.
+ CallInst *CreateFAddReduce(Value *Acc, Value *Src);
+
+ /// \brief Create a vector fmul reduction intrinsic of the source vector.
+ /// The first parameter is a scalar accumulator value for ordered reductions.
+ CallInst *CreateFMulReduce(Value *Acc, Value *Src);
+
+ /// \brief Create a vector int add reduction intrinsic of the source vector.
+ CallInst *CreateAddReduce(Value *Src);
+
+ /// \brief Create a vector int mul reduction intrinsic of the source vector.
+ CallInst *CreateMulReduce(Value *Src);
+
+ /// \brief Create a vector int AND reduction intrinsic of the source vector.
+ CallInst *CreateAndReduce(Value *Src);
+
+ /// \brief Create a vector int OR reduction intrinsic of the source vector.
+ CallInst *CreateOrReduce(Value *Src);
+
+ /// \brief Create a vector int XOR reduction intrinsic of the source vector.
+ CallInst *CreateXorReduce(Value *Src);
+
+ /// \brief Create a vector integer max reduction intrinsic of the source
+ /// vector.
+ CallInst *CreateIntMaxReduce(Value *Src, bool IsSigned = false);
+
+ /// \brief Create a vector integer min reduction intrinsic of the source
+ /// vector.
+ CallInst *CreateIntMinReduce(Value *Src, bool IsSigned = false);
+
+ /// \brief Create a vector float max reduction intrinsic of the source
+ /// vector.
+ CallInst *CreateFPMaxReduce(Value *Src, bool NoNaN = false);
+
+ /// \brief Create a vector float min reduction intrinsic of the source
+ /// vector.
+ CallInst *CreateFPMinReduce(Value *Src, bool NoNaN = false);
+
/// \brief Create a lifetime.start intrinsic.
///
/// If the pointer isn't i8* it will be converted.
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 7b78d4d3d34..19f6045568f 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -812,6 +812,50 @@ def int_memcpy_element_atomic : Intrinsic<[],
[IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
WriteOnly<0>, ReadOnly<1>]>;
+//===------------------------ Reduction Intrinsics ------------------------===//
+//
+def int_experimental_vector_reduce_fadd : Intrinsic<[llvm_anyfloat_ty],
+ [llvm_anyfloat_ty,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_fmul : Intrinsic<[llvm_anyfloat_ty],
+ [llvm_anyfloat_ty,
+ llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_add : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_mul : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_and : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_or : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_xor : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_smax : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_smin : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_umax : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_umin : Intrinsic<[llvm_anyint_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_fmax : Intrinsic<[llvm_anyfloat_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+def int_experimental_vector_reduce_fmin : Intrinsic<[llvm_anyfloat_ty],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
//===----- Intrinsics that are used to provide predicate information -----===//
def int_ssa_copy : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index a1cf41d6f93..94d10c98eb0 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
@@ -42,6 +43,7 @@ class PredIteratorCache;
class ScalarEvolution;
class SCEV;
class TargetLibraryInfo;
+class TargetTransformInfo;
/// \brief Captures loop safety information.
/// It keep information for loop & its header may throw exception.
@@ -489,6 +491,30 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
LoopSafetyInfo *SafetyInfo,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Create a target reduction of the given vector. The reduction operation
+/// is described by the \p Opcode parameter. min/max reductions require
+/// additional information supplied in \p Flags.
+/// The target is queried to determine if intrinsics or shuffle sequences are
+/// required to implement the reduction.
+Value *
+createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
+ unsigned Opcode, Value *Src,
+ TargetTransformInfo::ReductionFlags Flags =
+ TargetTransformInfo::ReductionFlags(),
+ ArrayRef<Value *> RedOps = ArrayRef<Value *>());
+
+/// Create a generic target reduction using a recurrence descriptor \p Desc
+/// The target is queried to determine if intrinsics or shuffle sequences are
+/// required to implement the reduction.
+Value *createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI,
+ RecurrenceDescriptor &Desc, Value *Src,
+ bool NoNaN = false);
+
+/// Get the intersection (logical and) of all of the potential IR flags
+/// of each scalar operation (VL) that will be converted into a vector (I).
+/// Flag set: NSW, NUW, exact, and all of fast-math.
+void propagateIRFlags(Value *I, ArrayRef<Value *> VL);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
OpenPOWER on IntegriCloud