summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp20
-rw-r--r--llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll15
2 files changed, 30 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fd7a60d7d04..762ad72d0be 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5269,11 +5269,17 @@ static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
if (i * 2 < NumElts) {
- if (V0.getOpcode() == ISD::UNDEF)
+ if (V0.getOpcode() == ISD::UNDEF) {
V0 = Op0.getOperand(0);
+ if (V0.getValueType() != VT)
+ return false;
+ }
} else {
- if (V1.getOpcode() == ISD::UNDEF)
+ if (V1.getOpcode() == ISD::UNDEF) {
V1 = Op0.getOperand(0);
+ if (V1.getValueType() != VT)
+ return false;
+ }
if (i * 2 == NumElts)
ExpectedVExtractIdx = BaseIdx;
}
@@ -5423,10 +5429,16 @@ static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG,
SubFound = true;
// Update InVec0 and InVec1.
- if (InVec0.getOpcode() == ISD::UNDEF)
+ if (InVec0.getOpcode() == ISD::UNDEF) {
InVec0 = Op0.getOperand(0);
- if (InVec1.getOpcode() == ISD::UNDEF)
+ if (InVec0.getValueType() != VT)
+ return SDValue();
+ }
+ if (InVec1.getOpcode() == ISD::UNDEF) {
InVec1 = Op1.getOperand(0);
+ if (InVec1.getValueType() != VT)
+ return SDValue();
+ }
// Make sure that operands in input to each add/sub node always
// come from a same pair of vectors.
diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 5b2de28c0f5..71efa3f8f10 100644
--- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -315,4 +315,17 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) {
; CHECK-NOT: addsubps
; CHECK: ret
-
+define <2 x float> @test_v2f32(<2 x float> %v0, <2 x float> %v1) {
+ %v2 = extractelement <2 x float> %v0, i32 0
+ %v3 = extractelement <2 x float> %v1, i32 0
+ %v4 = extractelement <2 x float> %v0, i32 1
+ %v5 = extractelement <2 x float> %v1, i32 1
+ %sub = fsub float %v2, %v3
+ %add = fadd float %v5, %v4
+ %res0 = insertelement <2 x float> undef, float %sub, i32 0
+ %res1 = insertelement <2 x float> %res0, float %add, i32 1
+ ret <2 x float> %res1
+}
+; CHECK-LABEL: test_v2f32
+; CHECK: addsubps %xmm1, %xmm0
+; CHECK-NEXT: retq
OpenPOWER on IntegriCloud