summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-04-24 14:53:54 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-04-24 14:53:54 +0000
commit9f5697ef6803acededad77a51b40a3fbec905c81 (patch)
treedd422f7e0b7a2a3764dbedb7e25bb0a6e24cf054 /llvm/lib
parenta425bbbfb871e44d515a59543eb5b226a689d20c (diff)
downloadbcm5719-llvm-9f5697ef6803acededad77a51b40a3fbec905c81.tar.gz
bcm5719-llvm-9f5697ef6803acededad77a51b40a3fbec905c81.zip
[X86][SSE] Improved support for decoding target shuffle masks through bitcasts
Reused the ability to split constants of a type wider than the shuffle mask to work with masks generated from scalar constants transfered to xmm. This fixes an issue preventing PSHUFB target shuffle masks decoding rematerialized scalar constants and also exposes the XOP VPPERM bug described in PR27472. llvm-svn: 267343
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp46
1 files changed, 26 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2ec27bde222..966af04275c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4678,7 +4678,22 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
MVT VT = MaskNode.getSimpleValueType();
assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
+ // Split an APInt element into MaskEltSizeInBits sized pieces and
+ // insert into the shuffle mask.
+ auto SplitElementToMask = [&](APInt Element) {
+ // Note that this is x86 and so always little endian: the low byte is
+ // the first byte of the mask.
+ int Split = VT.getScalarSizeInBits() / MaskEltSizeInBits;
+ for (int i = 0; i < Split; ++i) {
+ APInt RawElt = Element.getLoBits(MaskEltSizeInBits);
+ Element = Element.lshr(MaskEltSizeInBits);
+ RawMask.push_back(RawElt.getZExtValue());
+ }
+ };
+
if (MaskNode.getOpcode() == X86ISD::VBROADCAST) {
+ // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
+ // TODO: Handle (VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0
if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
return false;
if (auto *CN = dyn_cast<ConstantSDNode>(MaskNode.getOperand(0))) {
@@ -4693,13 +4708,16 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
- if (VT.getScalarSizeInBits() != MaskEltSizeInBits)
+
+ // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
+ if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
return false;
- SDValue MaskElement = MaskNode.getOperand(0).getOperand(0);
- if (auto *CN = dyn_cast<ConstantSDNode>(MaskElement)) {
- APInt RawElt = CN->getAPIntValue().getLoBits(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- RawMask.append(VT.getVectorNumElements() - 1, 0);
+ unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
+
+ SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
+ if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
+ SplitElementToMask(CN->getAPIntValue());
+ RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
return true;
}
return false;
@@ -4711,7 +4729,6 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
// TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
return false;
- unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
for (int i = 0, e = MaskNode.getNumOperands(); i < e; ++i) {
SDValue Op = MaskNode.getOperand(i);
@@ -4720,23 +4737,12 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
continue;
}
- APInt MaskElement;
if (auto *CN = dyn_cast<ConstantSDNode>(Op.getNode()))
- MaskElement = CN->getAPIntValue();
+ SplitElementToMask(CN->getAPIntValue());
else if (auto *CFN = dyn_cast<ConstantFPSDNode>(Op.getNode()))
- MaskElement = CFN->getValueAPF().bitcastToAPInt();
+ SplitElementToMask(CFN->getValueAPF().bitcastToAPInt());
else
return false;
-
- // We now have to decode the element which could be any integer size and
- // extract each byte of it.
- for (unsigned j = 0; j < ElementSplit; ++j) {
- // Note that this is x86 and so always little endian: the low byte is
- // the first byte of the mask.
- APInt RawElt = MaskElement.getLoBits(MaskEltSizeInBits);
- RawMask.push_back(RawElt.getZExtValue());
- MaskElement = MaskElement.lshr(MaskEltSizeInBits);
- }
}
return true;
OpenPOWER on IntegriCloud