summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp8
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp21
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp9
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h10
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp20
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h4
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp21
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h10
10 files changed, 86 insertions, 29 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 9eb63e2c711..98806551e4d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6084,8 +6084,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// FADD -> FMA combines:
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
- DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
@@ -6161,8 +6161,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// FSUB -> FMA combines:
if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
DAG.getTarget().Options.UnsafeFPMath) &&
- DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
- TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+ DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+ (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index dcde5ac3b89..2a1ded04b48 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4922,7 +4922,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::fmuladd: {
EVT VT = TLI->getValueType(I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
- TLI->isFMAFasterThanMulAndAdd(VT)) {
+ TLI->isFMAFasterThanFMulAndFAdd(VT)) {
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 84051d40eca..1fa1edba190 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2798,6 +2798,27 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
+bool
+AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f16:
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ case MVT::f128:
+ return false;
+ default:
+ break;
+ }
+
+ return false;
+}
+
AArch64TargetLowering::ConstraintType
AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
if (Constraint.size() == 1) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 901a9bec28a..320346e60b7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -229,11 +229,11 @@ public:
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
- /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
- /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
- /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
- /// is expanded to mul + add.
- virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
ConstraintType getConstraintType(const std::string &Constraint) const;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index cf41c02e749..812f096cdd6 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -7809,18 +7809,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return true;
}
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
- case MVT::v4f32:
return true;
default:
break;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 4801a415ca4..776ad2a75ff 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -459,11 +459,11 @@ namespace llvm {
/// relative to software emulation.
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
- /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
- /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
- /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
- /// is expanded to mul + add.
- virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index b1abc2c3c10..d344134b24f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -255,6 +255,26 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm)
MaxStoresPerMemsetOptSize = 0;
}
+bool
+SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ case MVT::f128:
+ return false;
+ default:
+ break;
+ }
+
+ return false;
+}
+
bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
// We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
return Imm.isZero() || Imm.isNegZero();
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 4ddfcbbda05..88e1fa7d746 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -129,9 +129,7 @@ public:
virtual EVT getSetCCResultType(LLVMContext &, EVT) const {
return MVT::i32;
}
- virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE {
- return true;
- }
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE;
virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a680ac09b5f..f00df3543a8 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12966,6 +12966,27 @@ bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
return false;
}
+bool
+X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+ return false;
+
+ VT = VT.getScalarType();
+
+ if (!VT.isSimple())
+ return false;
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::f32:
+ case MVT::f64:
+ return true;
+ default:
+ break;
+ }
+
+ return false;
+}
+
bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
return !(VT1 == MVT::i32 && VT2 == MVT::i16);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 0e5e822e6bf..8317824b84d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -646,11 +646,11 @@ namespace llvm {
virtual bool isZExtFree(EVT VT1, EVT VT2) const;
virtual bool isZExtFree(SDValue Val, EVT VT2) const;
- /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
- /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
- /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
- /// is expanded to mul + add.
- virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
/// isNarrowingProfitable - Return true if it's profitable to narrow
/// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
OpenPOWER on IntegriCloud