summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp11
-rw-r--r--llvm/test/CodeGen/X86/vec_fneg.ll32
4 files changed, 19 insertions, 37 deletions
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index d482b539a9d..28d27b7a459 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1602,10 +1602,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V);
bool isBitwiseNot(SDValue V);
/// Returns the SDNode if it is a constant splat BuildVector or constant int.
-ConstantSDNode *isConstOrConstSplat(SDValue N);
+ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false);
/// Returns the SDNode if it is a constant splat BuildVector or constant float.
-ConstantFPSDNode *isConstOrConstSplatFP(SDValue N);
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false);
class GlobalAddressSDNode : public SDNode {
friend class SelectionDAG;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3872f2d0142..ec7f63a3d3f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11011,8 +11011,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue DAGCombiner::visitFSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
- ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+ ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
EVT VT = N->getValueType(0);
SDLoc DL(N);
const TargetOptions &Options = DAG.getTarget().Options;
@@ -11044,9 +11044,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return DAG.getConstantFP(0.0f, DL, VT);
}
- // (fsub 0, B) -> -B
+ // (fsub -0.0, N1) -> -N1
if (N0CFP && N0CFP->isZero()) {
- if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) {
+ if (N0CFP->isNegative() ||
+ (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
return GetNegatedExpression(N1, DAG, LegalOperations);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index a8843314ef3..0f8bd080867 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8211,7 +8211,7 @@ bool llvm::isBitwiseNot(SDValue V) {
return C && C->isAllOnesValue();
}
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -8220,9 +8220,7 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
// BuildVectors can truncate their operands. Ignore that case here.
- // FIXME: We blindly ignore splats which include undef which is overly
- // pessimistic.
- if (CN && UndefElements.none() &&
+ if (CN && (UndefElements.none() || AllowUndefs) &&
CN->getValueType(0) == N.getValueType().getScalarType())
return CN;
}
@@ -8230,15 +8228,14 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
return nullptr;
}
-ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
return CN;
if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
BitVector UndefElements;
ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
-
- if (CN && UndefElements.none())
+ if (CN && (UndefElements.none() || AllowUndefs))
return CN;
}
diff --git a/llvm/test/CodeGen/X86/vec_fneg.ll b/llvm/test/CodeGen/X86/vec_fneg.ll
index e9dc88ea433..e84f7163bbe 100644
--- a/llvm/test/CodeGen/X86/vec_fneg.ll
+++ b/llvm/test/CodeGen/X86/vec_fneg.ll
@@ -102,16 +102,12 @@ define <2 x float> @fneg_bitcast(i64 %i) nounwind {
define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
; X32-SSE-LABEL: fneg_undef_elts_v4f32:
; X32-SSE: # %bb.0:
-; X32-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X32-SSE-NEXT: subps %xmm0, %xmm1
-; X32-SSE-NEXT: movaps %xmm1, %xmm0
+; X32-SSE-NEXT: xorps {{\.LCPI.*}}, %xmm0
; X32-SSE-NEXT: retl
;
; X64-SSE-LABEL: fneg_undef_elts_v4f32:
; X64-SSE: # %bb.0:
-; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = <-0,u,u,-0>
-; X64-SSE-NEXT: subps %xmm0, %xmm1
-; X64-SSE-NEXT: movaps %xmm1, %xmm0
+; X64-SSE-NEXT: xorps {{.*}}(%rip), %xmm0
; X64-SSE-NEXT: retq
%r = fsub <4 x float> <float -0.0, float undef, float undef, float -0.0>, %x
ret <4 x float> %r
@@ -120,25 +116,13 @@ define <4 x float> @fneg_undef_elts_v4f32(<4 x float> %x) {
; This isn't fneg, but similarly check that (X - 0.0) is simplified.
define <4 x float> @fsub0_undef_elts_v4f32(<4 x float> %x) {
-; X32-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE1: # %bb.0:
-; X32-SSE1-NEXT: retl
-;
-; X32-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X32-SSE2: # %bb.0:
-; X32-SSE2-NEXT: xorps %xmm1, %xmm1
-; X32-SSE2-NEXT: subps %xmm1, %xmm0
-; X32-SSE2-NEXT: retl
-;
-; X64-SSE1-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE1: # %bb.0:
-; X64-SSE1-NEXT: retq
+; X32-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X32-SSE: # %bb.0:
+; X32-SSE-NEXT: retl
;
-; X64-SSE2-LABEL: fsub0_undef_elts_v4f32:
-; X64-SSE2: # %bb.0:
-; X64-SSE2-NEXT: xorps %xmm1, %xmm1
-; X64-SSE2-NEXT: subps %xmm1, %xmm0
-; X64-SSE2-NEXT: retq
+; X64-SSE-LABEL: fsub0_undef_elts_v4f32:
+; X64-SSE: # %bb.0:
+; X64-SSE-NEXT: retq
%r = fsub <4 x float> %x, <float 0.0, float undef, float 0.0, float undef>
ret <4 x float> %r
}
OpenPOWER on IntegriCloud