summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp24
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ae0aae4365b..cd343192038 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -8910,6 +8910,10 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 5, 1))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V2, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, 6, 2, 7, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V2, V1);
// Otherwise fall back to a SHUFPS lowering strategy.
return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG);
@@ -8995,6 +8999,10 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 5, 1))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V2, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, 6, 2, 7, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V2, V1);
// Try to use byte rotation instructions.
// Its more profitable for pre-SSSE3 to use shuffles/unpacks.
@@ -10720,6 +10728,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 6, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, 5, 1, 7, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
// If we have a single input to the zero element, insert that into V1 if we
// can do so cheaply.
@@ -10838,6 +10850,10 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 6, 2))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V2, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, 5, 1, 7, 3))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V2, V1);
// Try to simplify this by merging 128-bit lanes to enable a lane-based
// shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -10899,6 +10915,10 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 8, 0, 9, 1, 12, 4, 13, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V2, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, 10, 2, 11, 3, 14, 6, 15, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V2, V1);
// Otherwise, fall back to a SHUFPS sequence. Here it is important that we
// have already handled any direct blends. We also need to squash the
@@ -10995,6 +11015,10 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2);
if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15))
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, 8, 0, 9, 1, 12, 4, 13, 5))
+ return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V2, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, 10, 2, 11, 3, 14, 6, 15, 7))
+ return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V2, V1);
}
// Try to use bit shift instructions.
OpenPOWER on IntegriCloud