diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-02-17 22:24:32 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2015-02-17 22:24:32 +0000 |
| commit | 1d89a02abbaa48745066a3c3eabe2fbaa2194def (patch) | |
| tree | 91849735bb8f6c6c44d5b33cc1ab5bb52a088124 /llvm/lib | |
| parent | c12cc5eba80e37a088b24b4085df70f1321103fc (diff) | |
| download | bcm5719-llvm-1d89a02abbaa48745066a3c3eabe2fbaa2194def.tar.gz bcm5719-llvm-1d89a02abbaa48745066a3c3eabe2fbaa2194def.zip | |
[X86][SSE] Generalised unpckl/unpckh shuffle matching
Added commuted unpckl/unpckh shuffle matching patterns as many cases containing undefined lanes fail to commute by themselves.
Differential Revision: http://reviews.llvm.org/D7564
llvm-svn: 229571
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ae0aae4365b..cd343192038 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8910,6 +8910,10 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 5, 1)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f32, V2, V1); + if (isShuffleEquivalent(V1, V2, Mask, 6, 2, 7, 3)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f32, V2, V1); // Otherwise fall back to a SHUFPS lowering strategy. return lowerVectorShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); @@ -8995,6 +8999,10 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, 2, 6, 3, 7)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 5, 1)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V2, V1); + if (isShuffleEquivalent(V1, V2, Mask, 6, 2, 7, 3)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V2, V1); // Try to use byte rotation instructions. // Its more profitable for pre-SSSE3 to use shuffles/unpacks. @@ -10720,6 +10728,10 @@ static SDValue lowerV4F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 6, 2)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4f64, V2, V1); + if (isShuffleEquivalent(V1, V2, Mask, 5, 1, 7, 3)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1); // If we have a single input to the zero element, insert that into V1 if we // can do so cheaply. @@ -10838,6 +10850,10 @@ static SDValue lowerV4I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, 1, 5, 3, 7)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, 4, 0, 6, 2)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i64, V2, V1); + if (isShuffleEquivalent(V1, V2, Mask, 5, 1, 7, 3)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i64, V2, V1); // Try to simplify this by merging 128-bit lanes to enable a lane-based // shuffle. However, if we have AVX2 and either inputs are already in place, @@ -10899,6 +10915,10 @@ static SDValue lowerV8F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, 8, 0, 9, 1, 12, 4, 13, 5)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8f32, V2, V1); + if (isShuffleEquivalent(V1, V2, Mask, 10, 2, 11, 3, 14, 6, 15, 7)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8f32, V2, V1); // Otherwise, fall back to a SHUFPS sequence. Here it is important that we // have already handled any direct blends. We also need to squash the @@ -10995,6 +11015,10 @@ static SDValue lowerV8I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V1, V2); if (isShuffleEquivalent(V1, V2, Mask, 2, 10, 3, 11, 6, 14, 7, 15)) return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V1, V2); + if (isShuffleEquivalent(V1, V2, Mask, 8, 0, 9, 1, 12, 4, 13, 5)) + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i32, V2, V1); + if (isShuffleEquivalent(V1, V2, Mask, 10, 2, 11, 3, 14, 6, 15, 7)) + return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v8i32, V2, V1); } // Try to use bit shift instructions. |

