diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-03-20 15:45:42 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-03-20 15:45:42 +0000 |
commit | c44472a5bc532a72b33d7bf242f75bf6aad78624 (patch) | |
tree | c43811616e11e5d5a23d3eadc01b91c3d771ea94 /llvm/lib/Target | |
parent | 74e4d55dccc556d7a5c8125901d14867ae1480e1 (diff) | |
download | bcm5719-llvm-c44472a5bc532a72b33d7bf242f75bf6aad78624.tar.gz bcm5719-llvm-c44472a5bc532a72b33d7bf242f75bf6aad78624.zip |
[X86][SSE] Detect zeroable shuffle elements from different value types
Improve computeZeroableShuffleElements to be able to peek through bitcasts to extract zero/undef values from BUILD_VECTOR nodes of different element sizes to the shuffle mask.
Differential Revision: http://reviews.llvm.org/D14261
llvm-svn: 263906
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 50 |
1 files changed, 42 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3eb01bfaff2..4331e3a3335 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7257,6 +7257,10 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask, bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); + int VectorSizeInBits = V1.getValueType().getSizeInBits(); + int ScalarSizeInBits = VectorSizeInBits / Mask.size(); + assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); + for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; // Handle the easy cases. @@ -7265,17 +7269,47 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask, continue; } - // If this is an index into a build_vector node (which has the same number - // of elements), dig out the input value and use it. + // Determine shuffle input and normalize the mask. SDValue V = M < Size ? V1 : V2; - if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands()) + M %= Size; + + // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements. + if (V.getOpcode() != ISD::BUILD_VECTOR) continue; - SDValue Input = V.getOperand(M % Size); - // The UNDEF opcode check really should be dead code here, but not quite - // worth asserting on (it isn't invalid, just unexpected). - if (Input.isUndef() || X86::isZeroNode(Input)) - Zeroable[i] = true; + // If the BUILD_VECTOR has fewer elements then the bitcasted portion of + // the (larger) source element must be UNDEF/ZERO. + if ((Size % V.getNumOperands()) == 0) { + int Scale = Size / V->getNumOperands(); + SDValue Op = V.getOperand(M / Scale); + if (Op.isUndef() || X86::isZeroNode(Op)) + Zeroable[i] = true; + else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { + APInt Val = Cst->getAPIntValue(); + Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val = Val.getLoBits(ScalarSizeInBits); + Zeroable[i] = (Val == 0); + } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { + APInt Val = Cst->getValueAPF().bitcastToAPInt(); + Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val = Val.getLoBits(ScalarSizeInBits); + Zeroable[i] = (Val == 0); + } + continue; + } + + // If the BUILD_VECTOR has more elements then all the (smaller) source + // elements must be UNDEF or ZERO. + if ((V.getNumOperands() % Size) == 0) { + int Scale = V->getNumOperands() / Size; + bool AllZeroable = true; + for (int j = 0; j < Scale; ++j) { + SDValue Op = V.getOperand((M * Scale) + j); + AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op)); + } + Zeroable[i] = AllZeroable; + continue; + } } return Zeroable; |