diff options
author | Michael Berg <michael_c_berg@apple.com> | 2018-06-18 23:44:59 +0000 |
---|---|---|
committer | Michael Berg <michael_c_berg@apple.com> | 2018-06-18 23:44:59 +0000 |
commit | 7b993d762f75c89e6e9a433a3b58125ed535ca36 (patch) | |
tree | b0a8999ac7d073d28113e4b1435eeb4db898d030 /llvm | |
parent | 87f0f95e4c5fd702d4c03a53de1d5d8d071b8946 (diff) | |
download | bcm5719-llvm-7b993d762f75c89e6e9a433a3b58125ed535ca36.tar.gz bcm5719-llvm-7b993d762f75c89e6e9a433a3b58125ed535ca36.zip |
Utilize new SDNode flag functionality to expand current support for fadd
Summary: This patch originated from D46562 and is a proper subset, with some issues addressed.
Reviewers: spatel, hfinkel, wristow, arsenm, javed.absar
Reviewed By: spatel
Subscribers: wdng, nhaehnle
Differential Revision: https://reviews.llvm.org/D47909
llvm-svn: 334996
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 26 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/fadd-combines.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fadd.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fmf-flags.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fp-fold.ll | 31 |
6 files changed, 50 insertions, 51 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b66b2f8730a..b3f5531e987 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10327,20 +10327,21 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // fold (fadd A, 0) -> A - if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) - if (N1C->isZero()) - return N0; + ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); + if (N1C && N1C->isZero()) { + if (N1C->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + // fold (fadd A, 0) -> A + return N0; + } } // No FP constant should be created after legalization as Instruction // Selection pass has a hard time dealing with FP constants. bool AllowNewConst = (Level < AfterLegalizeDAG); - // TODO: fmf test for NaNs could be done here too - if (Options.UnsafeFPMath && AllowNewConst) { + // If 'unsafe math' or nnan is enabled, fold lots of things. + if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) { // If allowed, fold (fadd (fneg x), x) -> 0.0 if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); @@ -10350,9 +10351,12 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getConstantFP(0.0, DL, VT); } - // If 'unsafe math' is enabled, fold lots of things. - // TODO: fmf testing for reassoc/nsz could be done here too - if (Options.UnsafeFPMath && AllowNewConst) { + // If 'unsafe math' or reassoc and nsz, fold lots of things. + // TODO: break out portions of the transformations below for which Unsafe is + // considered and which do not require both nsz and reassoc + if ((Options.UnsafeFPMath || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 if (N1CFP && N0.getOpcode() == ISD::FADD && isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d227c4a0efa..4fdd3de0ebc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4442,24 +4442,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) { - if (Opcode == ISD::FADD) { - // x+0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FSUB) { - // x-0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FMUL) { - // x*0 --> 0 - if (N2CFP && N2CFP->isZero()) - return N2; - // x*1 --> x - if (N2CFP && N2CFP->isExactlyValue(1.0)) - return N1; - } - } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll index 3c5524f0919..be027a7b558 100644 --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -100,11 +100,12 @@ define float @fadd_const_multiuse_fmf(float %x) { ; CHECK-LABEL: fadd_const_multiuse_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: adrp x9, .LCPI7_1 ; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: fmov s1, #17.00000000 +; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI7_1] ; CHECK-NEXT: fadd s1, s0, s1 -; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s1, s0 ; CHECK-NEXT: ret %a1 = fadd float %x, 42.0 %a2 = fadd nsz reassoc float %a1, 17.0 diff --git a/llvm/test/CodeGen/AMDGPU/fadd.ll b/llvm/test/CodeGen/AMDGPU/fadd.ll index a2f1f7195f2..c08fd357217 100644 --- a/llvm/test/CodeGen/AMDGPU/fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/fadd.ll @@ -66,7 +66,7 @@ define amdgpu_kernel void @fadd_v8f32(<8 x float> addrspace(1)* %out, <8 x float ; FUNC-LABEL: {{^}}fadd_0_nsz_attr_f32: ; SI-NOT: v_add_f32 define amdgpu_kernel void @fadd_0_nsz_attr_f32(float addrspace(1)* %out, float %a) #1 { - %add = fadd float %a, 0.0 + %add = fadd nsz float %a, 0.0 store float %add, float addrspace(1)* %out, align 4 ret void } diff --git a/llvm/test/CodeGen/X86/fmf-flags.ll b/llvm/test/CodeGen/X86/fmf-flags.ll index d958378d4f7..4fb2040b338 100644 --- a/llvm/test/CodeGen/X86/fmf-flags.ll +++ b/llvm/test/CodeGen/X86/fmf-flags.ll @@ -32,18 +32,13 @@ declare float @llvm.fmuladd.f32(float %a, float %b, float %c); define float @fast_fmuladd_opts(float %a , float %b , float %c) { ; X64-LABEL: fast_fmuladd_opts: ; X64: # %bb.0: -; X64-NEXT: movaps %xmm0, %xmm1 -; X64-NEXT: addss %xmm0, %xmm1 -; X64-NEXT: addss %xmm0, %xmm1 -; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_fmuladd_opts: ; X86: # %bb.0: ; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: fld %st(0) -; X86-NEXT: fadd %st(1) -; X86-NEXT: faddp %st(1) +; X86-NEXT: fmuls {{.*}} ; X86-NEXT: retl %res = call fast float @llvm.fmuladd.f32(float %a, float 2.0, float %a) ret float %res @@ -56,9 +51,9 @@ define float @fast_fmuladd_opts(float %a , float %b , float %c) { define double @not_so_fast_mul_add(double %x) { ; X64-LABEL: not_so_fast_mul_add: ; X64: # %bb.0: -; X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X64-NEXT: movsd {{.*}}(%rip), %xmm1 ; X64-NEXT: mulsd %xmm0, %xmm1 -; X64-NEXT: addsd %xmm1, %xmm0 +; X64-NEXT: mulsd {{.*}}(%rip), %xmm0 ; X64-NEXT: movsd %xmm1, {{.*}}(%rip) ; X64-NEXT: retq ; @@ -67,7 +62,9 @@ define double @not_so_fast_mul_add(double %x) { ; X86-NEXT: fldl {{[0-9]+}}(%esp) ; X86-NEXT: fld %st(0) ; X86-NEXT: fmull {{\.LCPI.*}} -; X86-NEXT: fadd %st(0), %st(1) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fmull {{\.LCPI.*}} +; X86-NEXT: fxch %st(1) ; X86-NEXT: fstpl mul1 ; X86-NEXT: retl %m = fmul double %x, 4.2 diff --git a/llvm/test/CodeGen/X86/fp-fold.ll b/llvm/test/CodeGen/X86/fp-fold.ll index 34354436c6e..b8e30a4ce42 100644 --- a/llvm/test/CodeGen/X86/fp-fold.ll +++ b/llvm/test/CodeGen/X86/fp-fold.ll @@ -17,18 +17,33 @@ define float @fadd_zero(float %x) { } define float @fadd_negzero(float %x) { -; STRICT-LABEL: fadd_negzero: -; STRICT: # %bb.0: -; STRICT-NEXT: addss {{.*}}(%rip), %xmm0 -; STRICT-NEXT: retq -; -; UNSAFE-LABEL: fadd_negzero: -; UNSAFE: # %bb.0: -; UNSAFE-NEXT: retq +; ANY-LABEL: fadd_negzero: +; ANY: # %bb.0: +; ANY-NEXT: retq %r = fadd float %x, -0.0 ret float %r } +define float @fadd_produce_zero(float %x) { +; ANY-LABEL: fadd_produce_zero: +; ANY: # %bb.0: +; ANY-NEXT: xorps %xmm0, %xmm0 +; ANY-NEXT: retq + %neg = fsub nsz float 0.0, %x + %r = fadd nnan float %neg, %x + ret float %r +} + +define float @fadd_reassociate(float %x) { +; ANY-LABEL: fadd_reassociate: +; ANY: # %bb.0: +; ANY-NEXT: addss {{.*}}(%rip), %xmm0 +; ANY-NEXT: retq + %sum = fadd float %x, 8.0 + %r = fadd reassoc nsz float %sum, 12.0 + ret float %r +} + define float @fadd_negzero_nsz(float %x) { ; ANY-LABEL: fadd_negzero_nsz: ; ANY: # %bb.0: |