summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp13
-rw-r--r--llvm/test/CodeGen/X86/sad.ll2
2 files changed, 10 insertions, 5 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8bc83f15038..a29b6a8283c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -43647,10 +43647,13 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
// The output of PSADBW is a vector of i64.
// We need to turn the vector of i64 into a vector of i32.
// If the reduction vector is at least as wide as the psadbw result, just
- // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
- // anyway.
+ // bitcast. If it's narrower which can only occur for v2i32, bits 127:16 of
+ // the PSADBW will be zero. If we promote/ narrow vectors, truncate the v2i64
+ // result to v2i32 which will be removed by type legalization. If we/ widen
+ // narrow vectors then we bitcast to v4i32 and extract v2i32.
MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
- if (VT.getSizeInBits() >= ResVT.getSizeInBits())
+ if (ExperimentalVectorWideningLegalization ||
+ VT.getSizeInBits() >= ResVT.getSizeInBits())
Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
else
Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad);
@@ -43660,6 +43663,10 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
SDValue Zero = DAG.getConstant(0, DL, VT);
Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,
DAG.getIntPtrConstant(0, DL));
+ } else if (ExperimentalVectorWideningLegalization &&
+ VT.getSizeInBits() < ResVT.getSizeInBits()) {
+ Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad,
+ DAG.getIntPtrConstant(0, DL));
}
// Preserve the reduction flag on the ADD. We may need to revisit for the
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll
index 648f9b44cab..918841a1864 100644
--- a/llvm/test/CodeGen/X86/sad.ll
+++ b/llvm/test/CodeGen/X86/sad.ll
@@ -1074,7 +1074,6 @@ define i32 @sad_2i8() nounwind {
; SSE2-NEXT: pand %xmm1, %xmm3
; SSE2-NEXT: pand %xmm1, %xmm2
; SSE2-NEXT: psadbw %xmm3, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: addq $4, %rax
; SSE2-NEXT: jne .LBB3_1
@@ -1097,7 +1096,6 @@ define i32 @sad_2i8() nounwind {
; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0],xmm0[1,2,3,4,5,6,7]
; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3,4,5,6,7]
; AVX-NEXT: vpsadbw %xmm3, %xmm2, %xmm2
-; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; AVX-NEXT: vpaddd %xmm1, %xmm2, %xmm1
; AVX-NEXT: addq $4, %rax
; AVX-NEXT: jne .LBB3_1
OpenPOWER on IntegriCloud