diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-10-31 11:21:39 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-10-31 11:21:39 +0000 |
| commit | a780b94cd1b03c3642a2662ea59dd6cfa7d6b81f (patch) | |
| tree | 0de02a802e198bad66a78d2a26eec380cb4e61b5 /llvm/lib/Target/X86 | |
| parent | 1eb04d289a6fb4c7cb75f69312a7b949987e7c97 (diff) | |
| download | bcm5719-llvm-a780b94cd1b03c3642a2662ea59dd6cfa7d6b81f.tar.gz bcm5719-llvm-a780b94cd1b03c3642a2662ea59dd6cfa7d6b81f.zip | |
[X86][SSE] Convert computeZeroableShuffleElements to emit KnownUndef and KnownZero
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 58 |
1 files changed, 35 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ee80d813644..13d62dc7cfb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -6732,7 +6732,7 @@ static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask, V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); - assert((VT.getSizeInBits() % Mask.size()) == 0 && + assert((VT.getSizeInBits() % Size) == 0 && "Illegal split of shuffle value type"); unsigned EltSizeInBits = VT.getSizeInBits() / Size; @@ -10423,9 +10423,12 @@ static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL, /// zero. Many x86 shuffles can zero lanes cheaply and we often want to handle /// as many lanes with this technique as possible to simplify the remaining /// shuffle. -static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, - SDValue V1, SDValue V2) { - APInt Zeroable(Mask.size(), 0); +static void computeZeroableShuffleElements(ArrayRef<int> Mask, + SDValue V1, SDValue V2, + APInt &KnownUndef, APInt &KnownZero) { + int Size = Mask.size(); + KnownUndef = KnownZero = APInt::getNullValue(Size); + V1 = peekThroughBitcasts(V1); V2 = peekThroughBitcasts(V2); @@ -10433,14 +10436,18 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); int VectorSizeInBits = V1.getValueSizeInBits(); - int ScalarSizeInBits = VectorSizeInBits / Mask.size(); + int ScalarSizeInBits = VectorSizeInBits / Size; assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); - for (int i = 0, Size = Mask.size(); i < Size; ++i) { + for (int i = 0; i < Size; ++i) { int M = Mask[i]; // Handle the easy cases. - if (M < 0 || (M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { - Zeroable.setBit(i); + if (M < 0) { + KnownUndef.setBit(i); + continue; + } + if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { + KnownZero.setBit(i); continue; } @@ -10457,20 +10464,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, if ((Size % V.getNumOperands()) == 0) { int Scale = Size / V->getNumOperands(); SDValue Op = V.getOperand(M / Scale); - if (Op.isUndef() || X86::isZeroNode(Op)) - Zeroable.setBit(i); + if (Op.isUndef()) + KnownUndef.setBit(i); + if (X86::isZeroNode(Op)) + KnownZero.setBit(i); else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { APInt Val = Cst->getAPIntValue(); - Val.lshrInPlace((M % Scale) * ScalarSizeInBits); - Val = Val.getLoBits(ScalarSizeInBits); + Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); if (Val == 0) - Zeroable.setBit(i); + KnownZero.setBit(i); } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { APInt Val = Cst->getValueAPF().bitcastToAPInt(); - Val.lshrInPlace((M % Scale) * ScalarSizeInBits); - Val = Val.getLoBits(ScalarSizeInBits); + Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); if (Val == 0) - Zeroable.setBit(i); + KnownZero.setBit(i); } continue; } @@ -10479,18 +10486,20 @@ static APInt computeZeroableShuffleElements(ArrayRef<int> Mask, // elements must be UNDEF or ZERO. if ((V.getNumOperands() % Size) == 0) { int Scale = V->getNumOperands() / Size; - bool AllZeroable = true; + bool AllUndef = true; + bool AllZero = true; for (int j = 0; j < Scale; ++j) { SDValue Op = V.getOperand((M * Scale) + j); - AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op)); + AllUndef &= Op.isUndef(); + AllZero &= X86::isZeroNode(Op); } - if (AllZeroable) - Zeroable.setBit(i); + if (AllUndef) + KnownUndef.setBit(i); + if (AllZero) + KnownZero.setBit(i); continue; } } - - return Zeroable; } // The Shuffle result is as follow: @@ -17077,7 +17086,10 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget, // We actually see shuffles that are entirely re-arrangements of a set of // zero inputs. This mostly happens while decomposing complex shuffles into // simple ones. Directly lower these as a buildvector of zeros. - APInt Zeroable = computeZeroableShuffleElements(OrigMask, V1, V2); + APInt KnownUndef, KnownZero; + computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero); + + APInt Zeroable = KnownUndef | KnownZero; if (Zeroable.isAllOnesValue()) return getZeroVector(VT, Subtarget, DAG, DL); |

