diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 04:03:35 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-22 04:03:35 +0000 |
| commit | 46e6b7adef59d96f30c173ac6ade57d81d7c7b1e (patch) | |
| tree | a465151a03a5ee3da1a74ceda2adb59d52dab9d6 /llvm | |
| parent | 770ec8680a0a1404fa7dc524ae2861ffdc5901eb (diff) | |
| download | bcm5719-llvm-46e6b7adef59d96f30c173ac6ade57d81d7c7b1e.tar.gz bcm5719-llvm-46e6b7adef59d96f30c173ac6ade57d81d7c7b1e.zip | |
AMDGPU: Check fast math flags in fadd/fsub combines
llvm-svn: 290312
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 19 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll | 63 |
3 files changed, 78 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 52cc0428e28..9a0002d9b0d 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3871,7 +3871,11 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N, return SDValue(); } -unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, EVT VT) const { +unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, + const SDNode *N0, + const SDNode *N1) const { + EVT VT = N0->getValueType(0); + // Only do this if we are not trying to support denormals. v_mad_f32 does not // support denormals ever. if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) || @@ -3879,7 +3883,10 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG, EVT VT) const return ISD::FMAD; const TargetOptions &Options = DAG.getTarget().Options; - if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || + Options.UnsafeFPMath || + (cast<BinaryWithFlagsSDNode>(N0)->Flags.hasUnsafeAlgebra() && + cast<BinaryWithFlagsSDNode>(N1)->Flags.hasUnsafeAlgebra())) && isFMAFasterThanFMulAndFAdd(VT)) { return ISD::FMA; } @@ -3907,7 +3914,7 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N, if (LHS.getOpcode() == ISD::FADD) { SDValue A = LHS.getOperand(0); if (A == LHS.getOperand(1)) { - unsigned FusedOp = getFusedOpcode(DAG, VT); + unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode()); if (FusedOp != 0) { const SDValue Two = DAG.getConstantFP(2.0, SL, VT); return DAG.getNode(FusedOp, SL, VT, Two, A, RHS); @@ -3919,7 +3926,7 @@ SDValue SITargetLowering::performFAddCombine(SDNode *N, if (RHS.getOpcode() == ISD::FADD) { SDValue A = RHS.getOperand(0); if (A == RHS.getOperand(1)) { - unsigned FusedOp = getFusedOpcode(DAG, VT); + unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode()); if (FusedOp != 0) { const SDValue Two = DAG.getConstantFP(2.0, SL, VT); return DAG.getNode(FusedOp, SL, VT, Two, A, LHS); @@ -3951,7 +3958,7 @@ SDValue SITargetLowering::performFSubCombine(SDNode *N, // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c) SDValue A = LHS.getOperand(0); if (A == LHS.getOperand(1)) { - unsigned FusedOp = getFusedOpcode(DAG, VT); + unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode()); if (FusedOp != 0){ const SDValue Two = DAG.getConstantFP(2.0, SL, VT); SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); @@ -3966,7 +3973,7 @@ SDValue SITargetLowering::performFSubCombine(SDNode *N, SDValue A = RHS.getOperand(0); if (A == RHS.getOperand(1)) { - unsigned FusedOp = getFusedOpcode(DAG, VT); + unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode()); if (FusedOp != 0){ const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT); return DAG.getNode(FusedOp, SL, VT, NegTwo, A, LHS); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index d7127193f05..9583f6db6fa 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -83,7 +83,8 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const; - unsigned getFusedOpcode(const SelectionDAG &DAG, EVT VT) const; + unsigned getFusedOpcode(const SelectionDAG &DAG, + const SDNode *N0, const SDNode *N1) const; SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll index fe209e34dd1..0af44ef200e 100644 --- a/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f64.ll @@ -112,6 +112,69 @@ define void @mad_sub_f64(double addrspace(1)* noalias nocapture %out, double add ret void } +; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast_add0: +; GCN-STRICT: v_add_f64 +; GCN-STRICT: v_add_f64 + +; GCN-CONTRACT: v_fma_f64 +define void @fadd_a_a_b_f64_fast_add0(double addrspace(1)* %out, + double addrspace(1)* %in1, + double addrspace(1)* %in2) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid + %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid + + %r0 = load volatile double, double addrspace(1)* %gep.0 + %r1 = load volatile double, double addrspace(1)* %gep.1 + + %add.0 = fadd fast double %r0, %r0 + %add.1 = fadd double %add.0, %r1 + store double %add.1, double addrspace(1)* %gep.out + ret void +} + +; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast_add1: +; GCN-STRICT: v_add_f64 +; GCN-STRICT: v_add_f64 + +; GCN-CONTRACT: v_fma_f64 +define void @fadd_a_a_b_f64_fast_add1(double addrspace(1)* %out, + double addrspace(1)* %in1, + double addrspace(1)* %in2) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid + %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid + + %r0 = load volatile double, double addrspace(1)* %gep.0 + %r1 = load volatile double, double addrspace(1)* %gep.1 + + %add.0 = fadd double %r0, %r0 + %add.1 = fadd fast double %add.0, %r1 + store double %add.1, double addrspace(1)* %gep.out + ret void +} + +; GCN-LABEL: {{^}}fadd_a_a_b_f64_fast: +; GCN: v_fma_f64 +define void @fadd_a_a_b_f64_fast(double addrspace(1)* %out, + double addrspace(1)* %in1, + double addrspace(1)* %in2) #0 { + %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone + %gep.0 = getelementptr double, double addrspace(1)* %out, i32 %tid + %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 + %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid + + %r0 = load volatile double, double addrspace(1)* %gep.0 + %r1 = load volatile double, double addrspace(1)* %gep.1 + + %add.0 = fadd fast double %r0, %r0 + %add.1 = fadd fast double %add.0, %r1 + store double %add.1, double addrspace(1)* %gep.out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare double @llvm.fmuladd.f64(double, double, double) #1 |

