diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 74 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll | 6 |
2 files changed, 41 insertions, 39 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c1e0137a4fb..39cdd77f0be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2996,17 +2996,6 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { InVec, InIdx)); } -// Utility for visitShuffleVector - Return true if every element in Mask, -// beginning from position Pos and ending in Pos+Size, falls within the -// specified sequential range [L, L+Pos). or is undef. -static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, - unsigned Pos, unsigned Size, int Low) { - for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) - if (Mask[i] >= 0 && Mask[i] != Low) - return false; - return true; -} - void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); @@ -3030,29 +3019,46 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // Mask is longer than the source vectors and is a multiple of the source // vectors. We can use concatenate vector to make the mask and vectors // lengths match. - if (SrcNumElts*2 == MaskNumElts) { - // First check for Src1 in low and Src2 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src1, Src2)); - return; + + unsigned NumConcat = MaskNumElts / SrcNumElts; + + // Check if the shuffle is some kind of concatenation of the input vectors. + bool IsConcat = true; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx < 0) + continue; + // Ensure the indices in each SrcVT sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) { + IsConcat = false; + break; } - // Then check for Src2 in low and Src1 in high - if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && - isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { - // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), - VT, Src2, Src1)); - return; + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. Just emit + // a CONCAT_VECTORS operation. + if (IsConcat) { + SmallVector<SDValue, 8> ConcatOps; + for (auto Src : ConcatSrcs) { + if (Src < 0) + ConcatOps.push_back(DAG.getUNDEF(SrcVT)); + else if (Src == 0) + ConcatOps.push_back(Src1); + else + ConcatOps.push_back(Src2); } + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), + VT, ConcatOps)); + return; } // Pad both vectors with undefs to make them the same length as the mask. - unsigned NumConcat = MaskNumElts / SrcNumElts; - bool Src1U = Src1.isUndef(); - bool Src2U = Src2.isUndef(); SDValue UndefVal = DAG.getUNDEF(SrcVT); SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); @@ -3060,10 +3066,12 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps1[0] = Src1; MOps2[0] = Src2; - Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps1); - Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurSDLoc(), VT, MOps2); + Src1 = Src1.isUndef() ? DAG.getUNDEF(VT) + : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps1); + Src2 = Src2.isUndef() ? DAG.getUNDEF(VT) + : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps2); // Readjust mask for new input vector length. SmallVector<int, 8> MappedOps; diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index 59eb4cdb195..c3fb355d508 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -318,12 +318,10 @@ define <4 x i64> @test_mm256_castpd_si256(<4 x double> %a0) nounwind { define <4 x double> @test_mm256_castpd128_pd256(<2 x double> %a0) nounwind { ; X32-LABEL: test_mm256_castpd128_pd256: ; X32: # BB#0: -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_castpd128_pd256: ; X64: # BB#0: -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <2 x double> %a0, <2 x double> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x double> %res @@ -370,12 +368,10 @@ define <4 x i64> @test_mm256_castps_si256(<8 x float> %a0) nounwind { define <8 x float> @test_mm256_castps128_ps256(<4 x float> %a0) nounwind { ; X32-LABEL: test_mm256_castps128_ps256: ; X32: # BB#0: -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_castps128_ps256: ; X64: # BB#0: -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <4 x float> %a0, <4 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> ret <8 x float> %res @@ -398,12 +394,10 @@ define <4 x float> @test_mm256_castps256_ps128(<8 x float> %a0) nounwind { define <4 x i64> @test_mm256_castsi128_si256(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm256_castsi128_si256: ; X32: # BB#0: -; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_castsi128_si256: ; X64: # BB#0: -; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> %a0, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ret <4 x i64> %res |

