diff options
| -rw-r--r-- | llvm/include/llvm/Target/TargetOptions.h | 16 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 43 | ||||
| -rw-r--r-- | llvm/lib/Target/ARM/ARMInstrInfo.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fusedMAC.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/a2-fp-basic.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/fma.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll | 2 | ||||
| -rw-r--r-- | llvm/tools/llc/llc.cpp | 6 | 
9 files changed, 60 insertions, 17 deletions
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index bc60673589f..84287fb5d76 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -35,7 +35,7 @@ namespace llvm {      TargetOptions()          : PrintMachineCode(false), NoFramePointerElim(false),            NoFramePointerElimNonLeaf(false), LessPreciseFPMADOption(false), -          NoExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false), +          AllowExcessFPPrecision(false), UnsafeFPMath(false), NoInfsFPMath(false),            NoNaNsFPMath(false), HonorSignDependentRoundingFPMathOption(false),            UseSoftFloat(false), NoZerosInBSS(false), JITExceptionHandling(false),            JITEmitDebugInfo(false), JITEmitDebugInfoToDisk(false), @@ -74,13 +74,13 @@ namespace llvm {      unsigned LessPreciseFPMADOption : 1;      bool LessPreciseFPMAD() const; -    /// NoExcessFPPrecision - This flag is enabled when the -    /// -disable-excess-fp-precision flag is specified on the command line. -    /// When this flag is off (the default), the code generator is allowed to -    /// produce results that are "more precise" than IEEE allows.  This includes -    /// use of FMA-like operations and use of the X86 FP registers without -    /// rounding all over the place. -    unsigned NoExcessFPPrecision : 1; +    /// AllowExcessFPPrecision - This flag is enabled when the +    /// -enable-excess-fp-precision flag is specified on the command line. This +    /// flag is OFF by default. When it is turned on, the code generator is +    /// allowed to produce results that are "more precise" than IEEE allows. +    /// This includes use of FMA-like operations and use of the X86 FP registers +    /// without rounding all over the place. +    unsigned AllowExcessFPPrecision : 1;      /// UnsafeFPMath - This flag is enabled when the      /// -enable-unsafe-fp-math flag is specified on the command line.  When diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 369d927112f..3517b7cfbe3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5633,6 +5633,26 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {                         DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,                                     N0.getOperand(1), N1)); +  // FADD -> FMA combines: +  if ((DAG.getTarget().Options.AllowExcessFPPrecision || +       DAG.getTarget().Options.UnsafeFPMath) && +      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && +      TLI.isOperationLegal(ISD::FMA, VT)) { + +    // fold (fadd (fmul x, y), z) -> (fma x, y, z) +    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { +      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, +                         N0.getOperand(0), N0.getOperand(1), N1); +    } +   +    // fold (fadd x, (fmul y, z)) -> (fma x, y, z) +    // Note: Commutes FADD operands. +    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { +      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, +                         N1.getOperand(0), N1.getOperand(1), N0); +    } +  } +    return SDValue();  } @@ -5690,6 +5710,29 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {      }    } +  // FSUB -> FMA combines: +  if ((DAG.getTarget().Options.AllowExcessFPPrecision || +       DAG.getTarget().Options.UnsafeFPMath) && +      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && +      TLI.isOperationLegal(ISD::FMA, VT)) { + +    // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) +    if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { +      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, +                         N0.getOperand(0), N0.getOperand(1), +                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); +    } + +    // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) +    // Note: Commutes FSUB operands. +    if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { +      return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, +                         DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, +                         N1.getOperand(0)), +                         N1.getOperand(1), N0); +    } +  } +    return SDValue();  } diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index 5131152d1e8..81e3527a6f0 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -236,7 +236,7 @@ def UseFPVMLx        : Predicate<"Subtarget->useFPVMLx()">;  // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available.  // But only select them if more precision in FP computation is allowed.  // Do not use them for Darwin platforms. -def UseFusedMAC      : Predicate<"!TM.Options.NoExcessFPPrecision && " +def UseFusedMAC      : Predicate<"TM.Options.AllowExcessFPPrecision && "                                   "!Subtarget->isTargetDarwin()">;  def DontUseFusedMAC  : Predicate<"!Subtarget->hasVFP4() || "                                   "Subtarget->isTargetDarwin()">; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index c9cdd5c1920..25b6dc733ea 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -353,7 +353,7 @@ def xaddroff : ComplexPattern<iPTR, 1, "SelectAddrIdxOffs", [], []>;  //===----------------------------------------------------------------------===//  // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!TM.Options.NoExcessFPPrecision">; +def FPContractions : Predicate<"TM.Options.AllowExcessFPPrecision">;  def In32BitMode  : Predicate<"!PPCSubTarget.isPPC64()">;  def In64BitMode  : Predicate<"PPCSubTarget.isPPC64()">;  def IsBookE  : Predicate<"PPCSubTarget.isBookE()">; diff --git a/llvm/test/CodeGen/ARM/fusedMAC.ll b/llvm/test/CodeGen/ARM/fusedMAC.ll index 3bf1ef4ad26..61e7d7b1a21 100644 --- a/llvm/test/CodeGen/ARM/fusedMAC.ll +++ b/llvm/test/CodeGen/ARM/fusedMAC.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 | FileCheck %s +; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -enable-excess-fp-precision | FileCheck %s  ; Check generated fused MAC and MLS.  define double @fusedMACTest1(double %d1, double %d2, double %d3) { diff --git a/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll b/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll index 932ad7a63ce..a47e662cc87 100644 --- a/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll +++ b/llvm/test/CodeGen/PowerPC/a2-fp-basic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc64 -mcpu=a2 | FileCheck %s +; RUN: llc < %s -march=ppc64 -mcpu=a2 -enable-excess-fp-precision | FileCheck %s  %0 = type { double, double } diff --git a/llvm/test/CodeGen/PowerPC/fma.ll b/llvm/test/CodeGen/PowerPC/fma.ll index 815c72c1f8a..02847147edb 100644 --- a/llvm/test/CodeGen/PowerPC/fma.ll +++ b/llvm/test/CodeGen/PowerPC/fma.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 | \ +; RUN: llc < %s -march=ppc32 -enable-excess-fp-precision | \  ; RUN:   egrep {fn?madd|fn?msub} | count 8  define double @test_FMADD1(double %A, double %B, double %C) { diff --git a/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll b/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll index 1fad2fa3aaf..25ec5f892c5 100644 --- a/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll +++ b/llvm/test/CodeGen/PowerPC/ppc440-fp-basic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -mcpu=440 | FileCheck %s +; RUN: llc < %s -march=ppc32 -mcpu=440 -enable-excess-fp-precision | FileCheck %s  %0 = type { double, double } diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index d0880913baf..b303cec3b51 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -156,8 +156,8 @@ DisableFPElimNonLeaf("disable-non-leaf-fp-elim",    cl::init(false));  static cl::opt<bool> -DisableExcessPrecision("disable-excess-fp-precision", -  cl::desc("Disable optimizations that may increase FP precision"), +EnableExcessPrecision("enable-excess-fp-precision", +  cl::desc("Enable optimizations that may increase FP precision"),    cl::init(false));  static cl::opt<bool> @@ -404,7 +404,7 @@ int main(int argc, char **argv) {    Options.LessPreciseFPMADOption = EnableFPMAD;    Options.NoFramePointerElim = DisableFPElim;    Options.NoFramePointerElimNonLeaf = DisableFPElimNonLeaf; -  Options.NoExcessFPPrecision = DisableExcessPrecision; +  Options.AllowExcessFPPrecision = EnableExcessPrecision;    Options.UnsafeFPMath = EnableUnsafeFPMath;    Options.NoInfsFPMath = EnableNoInfsFPMath;    Options.NoNaNsFPMath = EnableNoNaNsFPMath;  | 

