summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-20 15:45:42 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-03-20 15:45:42 +0000
commitc44472a5bc532a72b33d7bf242f75bf6aad78624 (patch)
treec43811616e11e5d5a23d3eadc01b91c3d771ea94 /llvm/lib/Target
parent74e4d55dccc556d7a5c8125901d14867ae1480e1 (diff)
downloadbcm5719-llvm-c44472a5bc532a72b33d7bf242f75bf6aad78624.tar.gz
bcm5719-llvm-c44472a5bc532a72b33d7bf242f75bf6aad78624.zip
[X86][SSE] Detect zeroable shuffle elements from different value types
Improve computeZeroableShuffleElements to be able to peek through bitcasts to extract zero/undef values from BUILD_VECTOR nodes of different element sizes to the shuffle mask. Differential Revision: http://reviews.llvm.org/D14261 llvm-svn: 263906
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp50
1 files changed, 42 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3eb01bfaff2..4331e3a3335 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7257,6 +7257,10 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
+ int VectorSizeInBits = V1.getValueType().getSizeInBits();
+ int ScalarSizeInBits = VectorSizeInBits / Mask.size();
+ assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
+
for (int i = 0, Size = Mask.size(); i < Size; ++i) {
int M = Mask[i];
// Handle the easy cases.
@@ -7265,17 +7269,47 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
continue;
}
- // If this is an index into a build_vector node (which has the same number
- // of elements), dig out the input value and use it.
+ // Determine shuffle input and normalize the mask.
SDValue V = M < Size ? V1 : V2;
- if (V.getOpcode() != ISD::BUILD_VECTOR || Size != (int)V.getNumOperands())
+ M %= Size;
+
+ // Currently we can only search BUILD_VECTOR for UNDEF/ZERO elements.
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
continue;
- SDValue Input = V.getOperand(M % Size);
- // The UNDEF opcode check really should be dead code here, but not quite
- // worth asserting on (it isn't invalid, just unexpected).
- if (Input.isUndef() || X86::isZeroNode(Input))
- Zeroable[i] = true;
+ // If the BUILD_VECTOR has fewer elements then the bitcasted portion of
+ // the (larger) source element must be UNDEF/ZERO.
+ if ((Size % V.getNumOperands()) == 0) {
+ int Scale = Size / V->getNumOperands();
+ SDValue Op = V.getOperand(M / Scale);
+ if (Op.isUndef() || X86::isZeroNode(Op))
+ Zeroable[i] = true;
+ else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
+ APInt Val = Cst->getAPIntValue();
+ Val = Val.lshr((M % Scale) * ScalarSizeInBits);
+ Val = Val.getLoBits(ScalarSizeInBits);
+ Zeroable[i] = (Val == 0);
+ } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
+ APInt Val = Cst->getValueAPF().bitcastToAPInt();
+ Val = Val.lshr((M % Scale) * ScalarSizeInBits);
+ Val = Val.getLoBits(ScalarSizeInBits);
+ Zeroable[i] = (Val == 0);
+ }
+ continue;
+ }
+
+ // If the BUILD_VECTOR has more elements then all the (smaller) source
+ // elements must be UNDEF or ZERO.
+ if ((V.getNumOperands() % Size) == 0) {
+ int Scale = V->getNumOperands() / Size;
+ bool AllZeroable = true;
+ for (int j = 0; j < Scale; ++j) {
+ SDValue Op = V.getOperand((M * Scale) + j);
+ AllZeroable &= (Op.isUndef() || X86::isZeroNode(Op));
+ }
+ Zeroable[i] = AllZeroable;
+ continue;
+ }
}
return Zeroable;
OpenPOWER on IntegriCloud