summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-08-09 10:56:29 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-08-09 10:56:29 +0000
commitaae7d4a1b6726a91fcd6441f3bbd9ca49461399f (patch)
treeae418b007d9b1ac024d55d52db83b189b89b6f91 /llvm/lib/Target/X86/X86ISelLowering.cpp
parent3a25d84a510c7aec668138960394c3d300259cae (diff)
downloadbcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.tar.gz
bcm5719-llvm-aae7d4a1b6726a91fcd6441f3bbd9ca49461399f.zip
[X86][XOP] Add support for combining target shuffles to VPPERM
llvm-svn: 278114
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp39
1 files changed, 39 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8a7dc3555b4..77fa9ff26f2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3844,6 +3844,7 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) {
default: return false;
case X86ISD::PSHUFB:
case X86ISD::VPERMILPV:
+ case X86ISD::VPPERM:
return true;
}
}
@@ -25325,6 +25326,44 @@ static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
return true;
}
+ // With XOP, if we have a 128-bit binary input shuffle we can always combine
+ // to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never
+ // slower than PSHUFB on targets that support both.
+ if ((Depth >= 3 || HasVariableMask) && RootVT.is128BitVector() &&
+ Subtarget.hasXOP()) {
+ // VPPERM Mask Operation
+ // Bits[4:0] - Byte Index (0 - 31)
+ // Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO)
+ SmallVector<SDValue, 16> VPPERMMask;
+ int NumBytes = 16;
+ int Ratio = NumBytes / NumMaskElts;
+ for (int i = 0; i < NumBytes; ++i) {
+ int M = Mask[i / Ratio];
+ if (M == SM_SentinelUndef) {
+ VPPERMMask.push_back(DAG.getUNDEF(MVT::i8));
+ continue;
+ }
+ if (M == SM_SentinelZero) {
+ VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8));
+ continue;
+ }
+ M = Ratio * M + i % Ratio;
+ VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8));
+ }
+ MVT ByteVT = MVT::v16i8;
+ V1 = DAG.getBitcast(ByteVT, V1);
+ DCI.AddToWorklist(V1.getNode());
+ V2 = DAG.getBitcast(ByteVT, V2);
+ DCI.AddToWorklist(V2.getNode());
+ SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
+ DCI.AddToWorklist(VPPERMMaskOp.getNode());
+ Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
+ DCI.AddToWorklist(Res.getNode());
+ DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+ /*AddTo*/ true);
+ return true;
+ }
+
// Failed to find any combines.
return false;
}
OpenPOWER on IntegriCloud