diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-18 17:23:09 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-18 17:23:09 +0000 |
| commit | 4171a91e92888bc8aa849cb07dc384867cea5e3d (patch) | |
| tree | 9ba62b512ae3b5bb8b799b326fa87daa9ab9e255 /llvm/lib | |
| parent | e955f8bac48ed7a5e287b4092f175e65f7c80aba (diff) | |
| download | bcm5719-llvm-4171a91e92888bc8aa849cb07dc384867cea5e3d.tar.gz bcm5719-llvm-4171a91e92888bc8aa849cb07dc384867cea5e3d.zip | |
[X86] combineVectorTruncationWithPACKUS - remove split/concatenation of mask
combineVectorTruncationWithPACKUS is currently splitting the upper bit bit masking into 128-bit subregs and then concatenating them back together.
This was originally done to avoid regressions that caused existing subregs to be concatenated to the larger type just for the AND masking before being extracted again. This was fixed by @spatel (notably rL303997 and rL347356).
This also lets SimplifyDemandedBits do some further improvements before it hits the recursive depth limit.
My only annoyance with this is that we were broadcasting some xmm masks but we seem to have lost them by moving to ymm - but that's a known issue as the logic in lowerBuildVectorAsBroadcast isn't great.
Differential Revision: https://reviews.llvm.org/D60375#inline-539623
llvm-svn: 358692
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 29 |
1 files changed, 6 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e0a43a7ab25..74e0ae07046 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39268,36 +39268,19 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, } /// Truncate using ISD::AND mask and X86ISD::PACKUS. +/// e.g. trunc <8 x i32> X to <8 x i16> --> +/// MaskX = X & 0xffff (clear high bits to prevent saturation) +/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1) static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL, const X86Subtarget &Subtarget, SelectionDAG &DAG) { SDValue In = N->getOperand(0); EVT InVT = In.getValueType(); - EVT InSVT = InVT.getVectorElementType(); EVT OutVT = N->getValueType(0); - EVT OutSVT = OutVT.getVectorElementType(); - - // Split a long vector into vectors of legal type and mask to unset all bits - // that won't appear in the result to prevent saturation. - // TODO - we should be doing this at the maximum legal size but this is - // causing regressions where we're concatenating back to max width just to - // perform the AND and then extracting back again..... - unsigned NumSubRegs = InVT.getSizeInBits() / 128; - unsigned NumSubRegElts = 128 / InSVT.getSizeInBits(); - EVT SubRegVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubRegElts); - SmallVector<SDValue, 8> SubVecs(NumSubRegs); - - APInt Mask = - APInt::getLowBitsSet(InSVT.getSizeInBits(), OutSVT.getSizeInBits()); - SDValue MaskVal = DAG.getConstant(Mask, DL, SubRegVT); - - for (unsigned i = 0; i < NumSubRegs; i++) { - SDValue Sub = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubRegVT, In, - DAG.getIntPtrConstant(i * NumSubRegElts, DL)); - SubVecs[i] = DAG.getNode(ISD::AND, DL, SubRegVT, Sub, MaskVal); - } - In = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, SubVecs); + APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(), + OutVT.getScalarSizeInBits()); + In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT)); return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget); } |

