diff options
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 50 |
1 files changed, 42 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3eb01bfaff2..4331e3a3335 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7257,6 +7257,10 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask, bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); + int VectorSizeInBits = V1.getValueType().getSizeInBits(); + int ScalarSizeInBits = VectorSizeInBits / Mask.size(); + assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); + for (int i = 0, Size = Mask.size(); i < Size; ++i) { int M = Mask[i]; // Handle the easy cases. @@ -7265,17 +7269,47 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask, continue; } - // If this is an index into a build_vector node (which has the same number - // of elements), dig out the input value and use it. + // Determine shuffle input and normalize the mask. SDValue V = M < Size ? V1 : V2; - if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands()) + M %= Size; + + // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements. + if (V.getOpcode() != ISD::BUILD_VECTOR) continue; - SDValue Input = V.getOperand(M % Size); - // The UNDEF opcode check really should be dead code here, but not quite - // worth asserting on (it isn't invalid, just unexpected). - if (Input.isUndef() || X86::isZeroNode(Input)) - Zeroable[i] = true; + // If the BUILD_VECTOR has fewer elements then the bitcasted portion of + // the (larger) source element must be UNDEF/ZERO. + if ((Size % V.getNumOperands()) == 0) { + int Scale = Size / V->getNumOperands(); + SDValue Op = V.getOperand(M / Scale); + if (Op.isUndef() || X86::isZeroNode(Op)) + Zeroable[i] = true; + else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { + APInt Val = Cst->getAPIntValue(); + Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val = Val.getLoBits(ScalarSizeInBits); + Zeroable[i] = (Val == 0); + } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { + APInt Val = Cst->getValueAPF().bitcastToAPInt(); + Val = Val.lshr((M % Scale) * ScalarSizeInBits); + Val = Val.getLoBits(ScalarSizeInBits); + Zeroable[i] = (Val == 0); + } + continue; + } + + // If the BUILD_VECTOR has more elements then all the (smaller) source + // elements must be UNDEF or ZERO. + if ((V.getNumOperands() % Size) == 0) { + int Scale = V->getNumOperands() / Size; + bool AllZeroable = true; + for (int j = 0; j < Scale; ++j) { + SDValue Op = V.getOperand((M * Scale) + j); + AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op)); + } + Zeroable[i] = AllZeroable; + continue; + } } return Zeroable; |