diff options
| author | Nate Begeman <natebegeman@mac.com> | 2005-12-14 22:54:33 +0000 |
|---|---|---|
| committer | Nate Begeman <natebegeman@mac.com> | 2005-12-14 22:54:33 +0000 |
| commit | e37cb604c15c1ef5e7083865360e9d964985ff2b (patch) | |
| tree | 536fb13a9b817f15bc56accc07953ea25b20ee02 /llvm | |
| parent | 7853983b40255787b1637e4d0c925ea042418d3b (diff) | |
| download | bcm5719-llvm-e37cb604c15c1ef5e7083865360e9d964985ff2b.tar.gz bcm5719-llvm-e37cb604c15c1ef5e7083865360e9d964985ff2b.zip | |
Use the new predicate support that Evan Cheng added to remove some code
from the DAGToDAG cpp file. This adds pattern support for vector and
scalar fma, which passes test/Regression/CodeGen/PowerPC/fma.ll, and
does the right thing in the presence of -disable-excess-fp-precision.
Allows us to match:
void %foo(<4 x float> * %a) {
entry:
%tmp1 = load <4 x float> * %a;
%tmp2 = mul <4 x float> %tmp1, %tmp1
%tmp3 = add <4 x float> %tmp2, %tmp1
store <4 x float> %tmp3, <4 x float> *%a
ret void
}
As:
_foo:
li r2, 0
lvx v0, r2, r3
vmaddfp v0, v0, v0, v0
stvx v0, r2, r3
blr
Or, with llc -disable-excess-fp-precision,
_foo:
li r2, 0
lvx v0, r2, r3
vxor v1, v1, v1
vmaddfp v1, v0, v0, v1
vaddfp v0, v1, v0
stvx v0, r2, r3
blr
llvm-svn: 24719
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 47 | ||||
| -rw-r--r-- | llvm/lib/Target/PowerPC/PPCInstrInfo.td | 34 |
2 files changed, 24 insertions, 57 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 01d089d9b91..f666f232556 100644 --- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -885,53 +885,6 @@ SDOperand PPCDAGToDAGISel::Select(SDOperand Op) { CurDAG->getTargetFrameIndex(FI, MVT::i32), getI32Imm(0)); } - case ISD::FADD: { - MVT::ValueType Ty = N->getValueType(0); - if (!NoExcessFPPrecision) { // Match FMA ops - if (N->getOperand(0).getOpcode() == ISD::FMUL && - N->getOperand(0).Val->hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS, - Ty, Select(N->getOperand(0).getOperand(0)), - Select(N->getOperand(0).getOperand(1)), - Select(N->getOperand(1))); - } else if (N->getOperand(1).getOpcode() == ISD::FMUL && - N->getOperand(1).hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMADD :PPC::FMADDS, - Ty, Select(N->getOperand(1).getOperand(0)), - Select(N->getOperand(1).getOperand(1)), - Select(N->getOperand(0))); - } - } - - // Other cases are autogenerated. - break; - } - case ISD::FSUB: { - MVT::ValueType Ty = N->getValueType(0); - - if (!NoExcessFPPrecision) { // Match FMA ops - if (N->getOperand(0).getOpcode() == ISD::FMUL && - N->getOperand(0).Val->hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ? PPC::FMSUB:PPC::FMSUBS, - Ty, Select(N->getOperand(0).getOperand(0)), - Select(N->getOperand(0).getOperand(1)), - Select(N->getOperand(1))); - } else if (N->getOperand(1).getOpcode() == ISD::FMUL && - N->getOperand(1).Val->hasOneUse()) { - ++FusedFP; // Statistic - return CurDAG->SelectNodeTo(N, Ty == MVT::f64 ?PPC::FNMSUB:PPC::FNMSUBS, - Ty, Select(N->getOperand(1).getOperand(0)), - Select(N->getOperand(1).getOperand(1)), - Select(N->getOperand(0))); - } - } - - // Other cases are autogenerated. - break; - } case ISD::SDIV: { // FIXME: since this depends on the setting of the carry flag from the srawi // we should really be making notes about that for the scheduler. diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index 04011e8513f..4ec8fe3346d 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -168,7 +168,7 @@ def crbitm: Operand<i8> { //===----------------------------------------------------------------------===// // PowerPC Instruction Predicate Definitions. -def FPContractions : Predicate<"!NoExcessFPPrecision">; +def FPContractions : Predicate<"NoExcessFPPrecision">; //===----------------------------------------------------------------------===// // PowerPC Instruction Definitions. @@ -746,22 +746,26 @@ def FNMADD : AForm_1<63, 31, (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmadd $FRT, $FRA, $FRC, $FRB", FPFused, [(set F8RC:$FRT, (fneg (fadd (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>; + F8RC:$FRB)))]>, + Requires<[FPContractions]>; def FNMADDS : AForm_1<59, 31, (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmadds $FRT, $FRA, $FRC, $FRB", FPGeneral, [(set F4RC:$FRT, (fneg (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>; + F4RC:$FRB)))]>, + Requires<[FPContractions]>; def FNMSUB : AForm_1<63, 30, (ops F8RC:$FRT, F8RC:$FRA, F8RC:$FRC, F8RC:$FRB), "fnmsub $FRT, $FRA, $FRC, $FRB", FPFused, [(set F8RC:$FRT, (fneg (fsub (fmul F8RC:$FRA, F8RC:$FRC), - F8RC:$FRB)))]>; + F8RC:$FRB)))]>, + Requires<[FPContractions]>; def FNMSUBS : AForm_1<59, 30, (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), "fnmsubs $FRT, $FRA, $FRC, $FRB", FPGeneral, [(set F4RC:$FRT, (fneg (fsub (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB)))]>; + F4RC:$FRB)))]>, + Requires<[FPContractions]>; // FSEL is artificially split into 4 and 8-byte forms for the result. To avoid // having 4 of these, force the comparison to always be an 8-byte double (code // should use an FMRSD if the input comparison value really wants to be a float) @@ -848,12 +852,14 @@ def RLDICR : MDForm_1<30, 1, def VMADDFP : VAForm_1<46, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC), "vmaddfp $vD, $vA, $vC, $vB", VecFP, [(set VRRC:$vD, (fadd (fmul VRRC:$vA, VRRC:$vC), - VRRC:$vB))]>; + VRRC:$vB))]>, + Requires<[FPContractions]>; def VNMSUBFP: VAForm_1<47, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB, VRRC:$vC), - "vnmsubfp $vD, $vA, $vC, $vB", VecFP, - [(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA, - VRRC:$vC), - VRRC:$vB)))]>; + "vnmsubfp $vD, $vA, $vC, $vB", VecFP, + [(set VRRC:$vD, (fneg (fsub (fmul VRRC:$vA, + VRRC:$vC), + VRRC:$vB)))]>, + Requires<[FPContractions]>; // VX-Form instructions. AltiVec arithmetic ops. def VADDFP : VXForm_1<10, (ops VRRC:$vD, VRRC:$vA, VRRC:$vB), @@ -971,6 +977,14 @@ def : Pat<(add GPRC:$in, (PPChi tconstpool:$g, 0)), def : Pat<(fmul VRRC:$vA, VRRC:$vB), (VMADDFP VRRC:$vA, (V_SET0), VRRC:$vB)>; +// Fused negative multiply subtract, alternate pattern +def : Pat<(fsub F8RC:$B, (fmul F8RC:$A, F8RC:$C)), + (FNMSUB F8RC:$A, F8RC:$C, F8RC:$B)>, + Requires<[FPContractions]>; +def : Pat<(fsub F4RC:$B, (fmul F4RC:$A, F4RC:$C)), + (FNMSUBS F4RC:$A, F4RC:$C, F4RC:$B)>, + Requires<[FPContractions]>; + // Fused multiply add and multiply sub for packed float. These are represented // separately from the real instructions above, for operations that must have // the additional precision, such as Newton-Rhapson (used by divide, sqrt) |

