diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 40 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll | 40 |
2 files changed, 62 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index eedffaf7166..96d38c4f97a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28588,13 +28588,14 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operand in case early exit happens. if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, - ShuffleVT) && + NewV1, DL, DAG, Subtarget, Shuffle, + ShuffleSrcVT, ShuffleVT) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - Res = DAG.getBitcast(ShuffleSrcVT, V1); + Res = DAG.getBitcast(ShuffleSrcVT, NewV1); DCI.AddToWorklist(Res.getNode()); Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); DCI.AddToWorklist(Res.getNode()); @@ -28616,33 +28617,36 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, } } + SDValue NewV1 = V1; // Save operands in case early exit happens. + SDValue NewV2 = V2; if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, - V1, V2, DL, DAG, Subtarget, Shuffle, + NewV1, NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, ShuffleVT, UnaryShuffle) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(ShuffleSrcVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleSrcVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2); + NewV1 = DAG.getBitcast(ShuffleSrcVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleSrcVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); } - if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, - AllowIntDomain, V1, V2, DL, DAG, - Subtarget, Shuffle, ShuffleVT, - PermuteImm) && + NewV1 = V1; // Save operands in case early exit happens. + NewV2 = V2; + if (matchBinaryPermuteVectorShuffle( + MaskVT, Mask, Zeroable, AllowFloatDomain, AllowIntDomain, NewV1, + NewV2, DL, DAG, Subtarget, Shuffle, ShuffleVT, PermuteImm) && (!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { if (Depth == 1 && Root.getOpcode() == Shuffle) return SDValue(); // Nothing to do! - V1 = DAG.getBitcast(ShuffleVT, V1); - DCI.AddToWorklist(V1.getNode()); - V2 = DAG.getBitcast(ShuffleVT, V2); - DCI.AddToWorklist(V2.getNode()); - Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2, + NewV1 = DAG.getBitcast(ShuffleVT, NewV1); + DCI.AddToWorklist(NewV1.getNode()); + NewV2 = DAG.getBitcast(ShuffleVT, NewV2); + DCI.AddToWorklist(NewV2.getNode()); + Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, DAG.getConstant(PermuteImm, DL, MVT::i8)); DCI.AddToWorklist(Res.getNode()); return DAG.getBitcast(RootVT, Res); diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll index 6abe609e26f..84ecf47fee7 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/partial_permute.ll @@ -4788,3 +4788,43 @@ define <2 x double> @test_masked_z_8xdouble_to_2xdouble_perm_mem_mask1(<8 x doub ret <2 x double> %res } +; PR35977 +define void @test_zext_v8i8_to_v8i16(<8 x i8>* %arg, <8 x i16>* %arg1) { +; CHECK-LABEL: test_zext_v8i8_to_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; CHECK-NEXT: vmovdqa %xmm0, (%rsi) +; CHECK-NEXT: retq + %tmp = getelementptr <8 x i8>, <8 x i8>* %arg, i32 0 + %tmp2 = load <8 x i8>, <8 x i8>* %tmp + %tmp3 = extractelement <8 x i8> %tmp2, i32 0 + %tmp4 = zext i8 %tmp3 to i16 + %tmp5 = insertelement <8 x i16> undef, i16 %tmp4, i32 0 + %tmp6 = extractelement <8 x i8> %tmp2, i32 1 + %tmp7 = zext i8 %tmp6 to i16 + %tmp8 = insertelement <8 x i16> %tmp5, i16 %tmp7, i32 1 + %tmp9 = extractelement <8 x i8> %tmp2, i32 2 + %tmp10 = zext i8 %tmp9 to i16 + %tmp11 = insertelement <8 x i16> %tmp8, i16 %tmp10, i32 2 + %tmp12 = extractelement <8 x i8> %tmp2, i32 3 + %tmp13 = zext i8 %tmp12 to i16 + %tmp14 = insertelement <8 x i16> %tmp11, i16 %tmp13, i32 3 + %tmp15 = extractelement <8 x i8> %tmp2, i32 4 + %tmp16 = zext i8 %tmp15 to i16 + %tmp17 = insertelement <8 x i16> %tmp14, i16 %tmp16, i32 4 + %tmp18 = extractelement <8 x i8> %tmp2, i32 5 + %tmp19 = zext i8 %tmp18 to i16 + %tmp20 = insertelement <8 x i16> %tmp17, i16 %tmp19, i32 5 + %tmp21 = extractelement <8 x i8> %tmp2, i32 6 + %tmp22 = zext i8 %tmp21 to i16 + %tmp23 = insertelement <8 x i16> %tmp20, i16 %tmp22, i32 6 + %tmp24 = extractelement <8 x i8> %tmp2, i32 7 + %tmp25 = zext i8 %tmp24 to i16 + %tmp26 = insertelement <8 x i16> %tmp23, i16 %tmp25, i32 7 + %tmp27 = shl <8 x i16> %tmp26, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> + %tmp28 = getelementptr <8 x i16>, <8 x i16>* %arg1, i32 0 + store <8 x i16> %tmp27, <8 x i16>* %tmp28 + ret void +} |

