summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-03-11 11:52:26 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-03-11 11:52:26 +0000
commitf9cc80d218fbd0be2a86fc1c5f03f894a99ddf18 (patch)
tree7e057d5c815d501b10b8d02561769e5d6cf4958b /llvm/lib/Target/X86/X86ISelLowering.cpp
parent2565bd421e20e34373041ac4a281947e70748c49 (diff)
downloadbcm5719-llvm-f9cc80d218fbd0be2a86fc1c5f03f894a99ddf18.tar.gz
bcm5719-llvm-f9cc80d218fbd0be2a86fc1c5f03f894a99ddf18.zip
[X86][AVX] createVariablePermute - use 2xVPERMIL+PCMPGT+SELECT for v8i32/v8f32 and v4i64/v4f64 variable permutes
As VPERMILPS/VPERMILPD only selects elements based on the bits[1:0]/bit[1] then we can permute both the (repeated) lo/hi 128-bit vectors in each case and then select between these results based on whether the index was for for lo/hi. For v4i64/v4f64 this avoids some rather nasty v4i64 multiples on the AVX2 implementation, which seems to be worse than the extra port5 pressure from the additional shuffles/blends. llvm-svn: 327239
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp37
1 files changed, 26 insertions, 11 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 55c28c3b108..89ad2bbed3c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8035,15 +8035,24 @@ SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
case MVT::v8i32:
if (Subtarget.hasAVX2())
Opcode = X86ISD::VPERMV;
- else if (Subtarget.hasXOP()) {
+ else if (Subtarget.hasAVX()) {
SrcVec = DAG.getBitcast(MVT::v8f32, SrcVec);
SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
{0, 1, 2, 3, 0, 1, 2, 3});
SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec,
{4, 5, 6, 7, 4, 5, 6, 7});
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32,
- LoLo, HiHi, IndicesVec,
- DAG.getConstant(0, DL, MVT::i8)));
+ if (Subtarget.hasXOP())
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32,
+ LoLo, HiHi, IndicesVec,
+ DAG.getConstant(0, DL, MVT::i8)));
+ // Permute Lo and Hi and then select based on index range.
+ // This works as VPERMILPS only uses index bits[0:1] to permute elements.
+ SDValue Res = DAG.getSelectCC(
+ DL, IndicesVec, DAG.getConstant(3, DL, MVT::v8i32),
+ DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, HiHi, IndicesVec),
+ DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, LoLo, IndicesVec),
+ ISD::CondCode::SETGT);
+ return DAG.getBitcast(VT, Res);
}
break;
case MVT::v4i64:
@@ -8060,7 +8069,7 @@ SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
return extract256BitVector(Res, 0, DAG, DL);
}
Opcode = X86ISD::VPERMV;
- } else if (Subtarget.hasXOP()) {
+ } else if (Subtarget.hasAVX()) {
SrcVec = DAG.getBitcast(MVT::v4f64, SrcVec);
SDValue LoLo =
DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {0, 1, 0, 1});
@@ -8068,12 +8077,18 @@ SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec,
DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {2, 3, 2, 3});
// VPERMIL2PD selects with bit#1 of the index vector, so scale IndicesVec.
IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec);
- return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64,
- LoLo, HiHi, IndicesVec,
- DAG.getConstant(0, DL, MVT::i8)));
- } else if (Subtarget.hasAVX2()) {
- Opcode = X86ISD::VPERMV;
- ShuffleVT = MVT::v8f32;
+ if (Subtarget.hasXOP())
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64,
+ LoLo, HiHi, IndicesVec,
+ DAG.getConstant(0, DL, MVT::i8)));
+ // Permute Lo and Hi and then select based on index range.
+ // This works as VPERMILPD only uses index bit[1] to permute elements.
+ SDValue Res = DAG.getSelectCC(
+ DL, IndicesVec, DAG.getConstant(2, DL, MVT::v4i64),
+ DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, HiHi, IndicesVec),
+ DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, LoLo, IndicesVec),
+ ISD::CondCode::SETGT);
+ return DAG.getBitcast(VT, Res);
}
break;
case MVT::v64i8:
OpenPOWER on IntegriCloud