[X86] combineVectorTruncationWithPACKUS - remove split/concatenation of mask

combineVectorTruncationWithPACKUS is currently splitting the upper bit bit masking into 128-bit subregs and then concatenating them back together. This was originally done to avoid regressions that caused existing subregs to be concatenated to the larger type just for the AND masking before being extracted again. This was fixed by @spatel (notably rL303997 and rL347356). This also lets SimplifyDemandedBits do some further improvements before it hits the recursive depth limit. My only annoyance with this is that we were broadcasting some xmm masks but we seem to have lost them by moving to ymm - but that's a known issue as the logic in lowerBuildVectorAsBroadcast isn't great. Differential Revision: https://reviews.llvm.org/D60375#inline-539623 llvm-svn: 358692
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-04-18 17:23:09 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-04-18 17:23:09 +0000
commit: 4171a91e92888bc8aa849cb07dc384867cea5e3d (patch)
tree: 9ba62b512ae3b5bb8b799b326fa87daa9ab9e255 /llvm/lib
parent: e955f8bac48ed7a5e287b4092f175e65f7c80aba (diff)
download: bcm5719-llvm-4171a91e92888bc8aa849cb07dc384867cea5e3d.tar.gz
bcm5719-llvm-4171a91e92888bc8aa849cb07dc384867cea5e3d.zip
1 files changed, 6 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e0a43a7ab25..74e0ae07046 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39268,36 +39268,19 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
 }
 
 /// Truncate using ISD::AND mask and X86ISD::PACKUS.
+/// e.g. trunc <8 x i32> X to <8 x i16> -->
+/// MaskX = X & 0xffff (clear high bits to prevent saturation)
+/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
 static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,
                                                  const X86Subtarget &Subtarget,
                                                  SelectionDAG &DAG) {
   SDValue In = N->getOperand(0);
   EVT InVT = In.getValueType();
-  EVT InSVT = InVT.getVectorElementType();
   EVT OutVT = N->getValueType(0);
-  EVT OutSVT = OutVT.getVectorElementType();
-
-  // Split a long vector into vectors of legal type and mask to unset all bits
-  // that won't appear in the result to prevent saturation.
-  // TODO - we should be doing this at the maximum legal size but this is
-  // causing regressions where we're concatenating back to max width just to
-  // perform the AND and then extracting back again.....
-  unsigned NumSubRegs = InVT.getSizeInBits() / 128;
-  unsigned NumSubRegElts = 128 / InSVT.getSizeInBits();
-  EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts);
-  SmallVector<SDValue, 8> SubVecs(NumSubRegs);
-
-  APInt Mask =
-      APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits());
-  SDValue MaskVal = DAG.getConstant(Mask, DL, SubRegVT);
-
-  for (unsigned i = 0; i < NumSubRegs; i++) {
-    SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In,
-                              DAG.getIntPtrConstant(i * NumSubRegElts, DL));
-    SubVecs[i] = DAG.getNode(ISD::AND, DL, SubRegVT, Sub, MaskVal);
-  }
-  In = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, SubVecs);
 
+  APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
+                                    OutVT.getScalarSizeInBits());
+  In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
   return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);
 }
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-04-18 17:23:09 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-04-18 17:23:09 +0000
commit	4171a91e92888bc8aa849cb07dc384867cea5e3d (patch)
tree	9ba62b512ae3b5bb8b799b326fa87daa9ab9e255 /llvm/lib
parent	e955f8bac48ed7a5e287b4092f175e65f7c80aba (diff)
download	bcm5719-llvm-4171a91e92888bc8aa849cb07dc384867cea5e3d.tar.gz bcm5719-llvm-4171a91e92888bc8aa849cb07dc384867cea5e3d.zip