summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp24
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5dd85f4e28f..efcc7ed2b15 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -7142,6 +7142,12 @@ static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
assert(Mask[1] >= 2 && "Non-canonicalized blend!");
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2f64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2f64, V1, V2);
+
unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1);
return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2,
DAG.getConstant(SHUFPDMask, MVT::i8));
@@ -7178,6 +7184,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
}
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
+ if (isShuffleEquivalent(Mask, 1, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
+
// We implement this with SHUFPD which is pretty lame because it will likely
// incur 2 cycles of stall for integer vectors on Nehalem and older chips.
// However, all the alternatives are still more cycles and newer chips don't
@@ -7216,6 +7228,12 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+
if (NumV2Elements == 1) {
int V2Index =
std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
@@ -7304,6 +7322,12 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1,
getV4X86ShuffleImm8ForMask(Mask, DAG));
+ // Use dedicated unpack instructions for masks that match their pattern.
+ if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
+ if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
+
// We implement this with SHUFPS because it can blend from two vectors.
// Because we're going to eventually use SHUFPS, we use SHUFPS even to build
// up the inputs, bypassing domain shift penalties that we would encur if we
OpenPOWER on IntegriCloud