diff options
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse41.ll | 18 |
2 files changed, 32 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 81889d0a745..06a61cbba64 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -9063,6 +9063,18 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, To = V2; DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) - Mask.begin(); + + // If we have 1 element from each vector, we have to check if we're + // changing V1's element's place. If so, we're done. Otherwise, we + // should assume we're changing V2's element's place and behave + // accordingly. + int FromV2 = std::count_if(Mask.begin(), Mask.end(), FromV2Predicate); + if (FromV1 == FromV2 && DestIndex == Mask[DestIndex] % 4) { + From = V2; + To = V1; + DestIndex = + std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin(); + } } else { assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 && "More than one element from V1 and from V2, or no elements from one " @@ -9074,6 +9086,8 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin(); } + // Get an index into the source vector in the range [0,4) (the mask is + // in the range [0,8) because it can address V1 and V2) unsigned SrcIndex = Mask[DestIndex] % 4; if (MayFoldLoad(From)) { // Trivial case, when From comes from a load and is only used by the diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 6726a3ea9ff..986488f531e 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -714,3 +714,21 @@ define <4 x float> @pr20087(<4 x float> %a, <4 x float> *%ptr) { %ret = shufflevector <4 x float> %load, <4 x float> %a, <4 x i32> <i32 4, i32 undef, i32 6, i32 2> ret <4 x float> %ret } + +; Edge case for insertps where we end up with a shuffle with mask=<0, 7, -1, -1> +define void @insertps_pr20411(i32* noalias nocapture %RET) #1 { +; CHECK-LABEL: insertps_pr20411: +; CHECK: movaps {{[^,]*}}, %[[REG1:xmm.]] +; CHECK: pshufd {{.*}} ## [[REG2:xmm.]] = mem[3,0,0,0] +; CHECK: insertps {{.*}} ## xmm1 = [[REG2]][0],[[REG1]][3]{{.*}} + + %gather_load = shufflevector <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %shuffle109 = shufflevector <4 x i32> <i32 4, i32 5, i32 6, i32 7>, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; 4 5 6 7 + + %shuffle116 = shufflevector <8 x i32> %gather_load, <8 x i32> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef> ; 3 x x x + %shuffle117 = shufflevector <4 x i32> %shuffle109, <4 x i32> %shuffle116, <4 x i32> <i32 4, i32 3, i32 undef, i32 undef> ; 3 7 x x + + %ptrcast = bitcast i32* %RET to <4 x i32>* + store <4 x i32> %shuffle117, <4 x i32>* %ptrcast, align 4 + ret void +} |