diff options
-rw-r--r-- | llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_fneg.ll | 32 |
4 files changed, 19 insertions, 37 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index d482b539a9d..28d27b7a459 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1602,10 +1602,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V); bool isBitwiseNot(SDValue V); /// Returns the SDNode if it is a constant splat BuildVector or constant int. -ConstantSDNode *isConstOrConstSplat(SDValue N); +ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false); /// Returns the SDNode if it is a constant splat BuildVector or constant float. -ConstantFPSDNode *isConstOrConstSplatFP(SDValue N); +ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false); class GlobalAddressSDNode : public SDNode { friend class SelectionDAG; diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3872f2d0142..ec7f63a3d3f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11011,8 +11011,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); - ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -11044,9 +11044,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return DAG.getConstantFP(0.0f, DL, VT); } - // (fsub 0, B) -> -B + // (fsub -0.0, N1) -> -N1 if (N0CFP && N0CFP->isZero()) { - if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { + if (N0CFP->isNegative() || + (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a8843314ef3..0f8bd080867 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8211,7 +8211,7 @@ bool llvm::isBitwiseNot(SDValue V) { return C && C->isAllOnesValue(); } -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) return CN; @@ -8220,9 +8220,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); // BuildVectors can truncate their operands. Ignore that case here. - // FIXME: We blindly ignore splats which include undef which is overly - // pessimistic. - if (CN && UndefElements.none() && + if (CN && (UndefElements.none() || AllowUndefs) && CN->getValueType(0) == N.getValueType().getScalarType()) return CN; } @@ -8230,15 +8228,14 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) { return nullptr; } -ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) { +ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) return CN; if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { BitVector UndefElements; ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); - - if (CN && UndefElements.none()) + if (CN && (UndefElements.none() || AllowUndefs)) return CN; } diff --git a/llvm/test/CodeGen/X86/vec_fneg.ll b/llvm/test/CodeGen/X86/vec_fneg.ll index e9dc88ea433..e84f7163bbe 100644 --- a/llvm/test/CodeGen/X86/vec_fneg.ll +++ b/llvm/test/CodeGen/X86/vec_fneg.ll @@ -102,16 +102,12 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind { define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) { ; X32-SSE-LABEL: fneg_undef_elts_v4f32: ; X32-SSE: # %bb.0: -; X32-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0> -; X32-SSE-NEXT: subps %xmm0, %xmm1 -; X32-SSE-NEXT: movaps %xmm1, %xmm0 +; X32-SSE-NEXT: xorps {{\.LCPI.*}}, %xmm0 ; X32-SSE-NEXT: retl ; ; X64-SSE-LABEL: fneg_undef_elts_v4f32: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0> -; X64-SSE-NEXT: subps %xmm0, %xmm1 -; X64-SSE-NEXT: movaps %xmm1, %xmm0 +; X64-SSE-NEXT: xorps {{.*}}(%rip), %xmm0 ; X64-SSE-NEXT: retq %r = fsub <4 x float> <float -0.0, float undef, float undef, float -0.0>, %x ret <4 x float> %r @@ -120,25 +116,13 @@ define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) { ; This isn't fneg, but similarly check that (X - 0.0) is simplified. define <4 x float> @fsub0_undef_elts_v4f32(<4 x float> %x) { -; X32-SSE1-LABEL: fsub0_undef_elts_v4f32: -; X32-SSE1: # %bb.0: -; X32-SSE1-NEXT: retl -; -; X32-SSE2-LABEL: fsub0_undef_elts_v4f32: -; X32-SSE2: # %bb.0: -; X32-SSE2-NEXT: xorps %xmm1, %xmm1 -; X32-SSE2-NEXT: subps %xmm1, %xmm0 -; X32-SSE2-NEXT: retl -; -; X64-SSE1-LABEL: fsub0_undef_elts_v4f32: -; X64-SSE1: # %bb.0: -; X64-SSE1-NEXT: retq +; X32-SSE-LABEL: fsub0_undef_elts_v4f32: +; X32-SSE: # %bb.0: +; X32-SSE-NEXT: retl ; -; X64-SSE2-LABEL: fsub0_undef_elts_v4f32: -; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: xorps %xmm1, %xmm1 -; X64-SSE2-NEXT: subps %xmm1, %xmm0 -; X64-SSE2-NEXT: retq +; X64-SSE-LABEL: fsub0_undef_elts_v4f32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: retq %r = fsub <4 x float> %x, <float 0.0, float undef, float 0.0, float undef> ret <4 x float> %r } |