summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp31
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll38
2 files changed, 32 insertions, 37 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b3ddbe8377c..b2159b1b945 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40635,16 +40635,23 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
if (N->getOpcode() == ISD::FNEG)
return N->getOperand(0);
+ unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits();
+
SDValue Op = peekThroughBitcasts(SDValue(N, 0));
- auto VT = Op->getValueType(0);
+ EVT VT = Op->getValueType(0);
+ // Make sure the element size does't change.
+ if (VT.getScalarSizeInBits() != ScalarSize)
+ return SDValue();
+
if (auto SVOp = dyn_cast<ShuffleVectorSDNode>(Op.getNode())) {
// For a VECTOR_SHUFFLE(VEC1, VEC2), if the VEC2 is undef, then the negate
// of this is VECTOR_SHUFFLE(-VEC1, UNDEF). The mask can be anything here.
if (!SVOp->getOperand(1).isUndef())
return SDValue();
if (SDValue NegOp0 = isFNEG(DAG, SVOp->getOperand(0).getNode()))
- return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
- SVOp->getMask());
+ if (NegOp0.getValueType() == VT) // FIXME: Can we do better?
+ return DAG.getVectorShuffle(VT, SDLoc(SVOp), NegOp0, DAG.getUNDEF(VT),
+ SVOp->getMask());
return SDValue();
}
unsigned Opc = Op.getOpcode();
@@ -40656,19 +40663,17 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
if (!InsVector.isUndef())
return SDValue();
if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode()))
- return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
- NegInsVal, Op.getOperand(2));
+ if (NegInsVal.getValueType() == VT.getVectorElementType()) // FIXME
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector,
+ NegInsVal, Op.getOperand(2));
return SDValue();
}
if (Opc != X86ISD::FXOR && Opc != ISD::XOR && Opc != ISD::FSUB)
return SDValue();
- SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
- if (!Op1.getValueType().isFloatingPoint())
- return SDValue();
-
- SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op0 = Op.getOperand(0);
// For XOR and FXOR, we want to check if constant bits of Op1 are sign bit
// masks. For FSUB, we have to check if constant bits of Op0 are sign bit
@@ -40680,7 +40685,7 @@ static SDValue isFNEG(SelectionDAG &DAG, SDNode *N) {
SmallVector<APInt, 16> EltBits;
// Extract constant bits and see if they are all sign bit masks. Ignore the
// undef elements.
- if (getTargetConstantBitsFromNode(Op1, Op1.getScalarValueSizeInBits(),
+ if (getTargetConstantBitsFromNode(Op1, ScalarSize,
UndefElts, EltBits,
/* AllowWholeUndefs */ true,
/* AllowPartialUndefs */ false)) {
@@ -41706,6 +41711,10 @@ static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG,
if (!NegVal)
return SDValue();
+ // FIXME: Should we bitcast instead?
+ if (NegVal.getValueType() != VT)
+ return SDValue();
+
unsigned NewOpcode;
switch (N->getOpcode()) {
default: llvm_unreachable("Unexpected opcode!");
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
index f9c899555dd..d7611d1de0a 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
@@ -5763,16 +5763,14 @@ define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %
; X86-LABEL: test_mm_mask_fmsub_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm2
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fmsub_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__W, i64 0
@@ -5817,16 +5815,14 @@ define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double>
; X86-LABEL: test_mm_maskz_fmsub_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm2
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X86-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_fmsub_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm2
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: vfmsub213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__A, i64 0
@@ -5874,19 +5870,15 @@ define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double
; X86-LABEL: test_mm_mask3_fmsub_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm2, %xmm3
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm3, %xmm0, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; X86-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask3_fmsub_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm2, %xmm3
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm3, %xmm0, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
+; X64-NEXT: vfmsub231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X64-NEXT: vmovapd %xmm2, %xmm0
; X64-NEXT: retq
entry:
@@ -5933,16 +5925,14 @@ define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext
; X86-LABEL: test_mm_mask_fnmadd_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask_fnmadd_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
+; X64-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__W, i64 0
@@ -5987,16 +5977,14 @@ define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double>
; X86-LABEL: test_mm_maskz_fnmadd_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X86-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X86-NEXT: retl
;
; X64-LABEL: test_mm_maskz_fnmadd_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
+; X64-NEXT: vfnmadd213sd {rn-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = extractelement <2 x double> %__A, i64 0
@@ -6044,17 +6032,15 @@ define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x doubl
; X86-LABEL: test_mm_mask3_fnmadd_round_sd:
; X86: # %bb.0: # %entry
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
-; X86-NEXT: vxorpd {{\.LCPI.*}}, %xmm1, %xmm1
; X86-NEXT: kmovw %eax, %k1
-; X86-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; X86-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X86-NEXT: vmovapd %xmm2, %xmm0
; X86-NEXT: retl
;
; X64-LABEL: test_mm_mask3_fnmadd_round_sd:
; X64: # %bb.0: # %entry
-; X64-NEXT: vxorpd {{.*}}(%rip), %xmm1, %xmm1
; X64-NEXT: kmovw %edi, %k1
-; X64-NEXT: vfmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; X64-NEXT: vfnmadd231sd {rn-sae}, %xmm1, %xmm0, %xmm2 {%k1}
; X64-NEXT: vmovapd %xmm2, %xmm0
; X64-NEXT: retq
entry:
OpenPOWER on IntegriCloud