diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-08-23 05:33:27 +0000 | 
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-08-23 05:33:27 +0000 | 
| commit | bdceb9fb14595d10f7d94e1dd950cf2d94d2f2d3 (patch) | |
| tree | 569be268bd103e431359badc31b7aae4ba3911b6 /llvm/lib/Target | |
| parent | 8798c8de9a8a4abf597cd370da57ebefaa4d951d (diff) | |
| download | bcm5719-llvm-bdceb9fb14595d10f7d94e1dd950cf2d94d2f2d3.tar.gz bcm5719-llvm-bdceb9fb14595d10f7d94e1dd950cf2d94d2f2d3.zip | |
[X86] Improve lowering of v2i32 SAD handling in combineLoopSADPattern.
For v2i32 we only feed 2 i8 elements into the psadbw instructions
with 0s in the other 14 bytes. The resulting psadbw instruction
will produce zeros in bits [127:16] of the output. We need to take
the result and feed it to a v2i32 add where the first element
includes bits [15:0] of the sad result. The other element should
be zero.
Prior to this patch we were using a truncate to take 0 from
bits 95:64 of the psadbw. This results in a pshufd to move those
bits to 63:32. But since we also have zeroes in bits 63:32 of
the psadbw output, we should just take those bits.
The previous code probably worked better with promoting legalization,
but now we use widening legalization. I've preserved the old
behavior if -x86-experimental-vector-widening-legalization=false
until we get that option removed.
llvm-svn: 369733
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | 
1 files changed, 10 insertions, 3 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8bc83f15038..a29b6a8283c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43647,10 +43647,13 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,    // The output of PSADBW is a vector of i64.    // We need to turn the vector of i64 into a vector of i32.    // If the reduction vector is at least as wide as the psadbw result, just -  // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero -  // anyway. +  // bitcast. If it's narrower which can only occur for v2i32, bits 127:16 of +  // the PSADBW will be zero. If we promote/ narrow vectors, truncate the v2i64 +  // result to v2i32 which will be removed by type legalization. If we/ widen +  // narrow vectors then we bitcast to v4i32 and extract v2i32.    MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32); -  if (VT.getSizeInBits() >= ResVT.getSizeInBits()) +  if (ExperimentalVectorWideningLegalization || +      VT.getSizeInBits() >= ResVT.getSizeInBits())      Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);    else      Sad = DAG.getNode(ISD::TRUNCATE, DL, VT, Sad); @@ -43660,6 +43663,10 @@ static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,      SDValue Zero = DAG.getConstant(0, DL, VT);      Sad = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Zero, Sad,                        DAG.getIntPtrConstant(0, DL)); +  } else if (ExperimentalVectorWideningLegalization && +             VT.getSizeInBits() < ResVT.getSizeInBits()) { +    Sad = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Sad, +                      DAG.getIntPtrConstant(0, DL));    }    // Preserve the reduction flag on the ADD. We may need to revisit for the | 

