diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-27 08:49:15 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2016-05-27 08:49:15 +0000 |
| commit | c013e5737bfe8de742adbc1c937415707c195dc2 (patch) | |
| tree | 070cb0154b207ee708ad5c1becd433f19b81c92c /llvm/test | |
| parent | 85624c5de3e831ffa01fdc2d159e3d69c30de08d (diff) | |
| download | bcm5719-llvm-c013e5737bfe8de742adbc1c937415707c195dc2.tar.gz bcm5719-llvm-c013e5737bfe8de742adbc1c937415707c195dc2.zip | |
[X86][SSE] Replace (V)PMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (llvm)
This patch removes the llvm intrinsics VPMOVSX and (V)PMOVZX sign/zero extension intrinsics and auto-upgrades to SEXT/ZEXT calls instead. We already did this for SSE41 PMOVSX sometime ago so much of that implementation can be reused.
A companion patch (D20684) removes/auto-upgrade the clang intrinsics.
Differential Revision: http://reviews.llvm.org/D20686
llvm-svn: 270973
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll | 66 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 106 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll | 66 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll | 96 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 197 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll | 63 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll | 66 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll | 96 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll | 37 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/stack-folding-int-avx2.ll | 45 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/x86-pmovsx.ll | 70 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/x86-pmovzx.ll | 137 |
13 files changed, 350 insertions, 731 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll index 862e9378afe..8d03784ce1b 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll @@ -247,6 +247,72 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone +define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxbd: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; CHECK-NEXT: retl + %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxbq: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: retl + %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxbw: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: retl + %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxdq: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; CHECK-NEXT: retl + %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxwd: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; CHECK-NEXT: retl + %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxwq: +; CHECK: ## BB#0: +; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; CHECK-NEXT: retl + %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone + + define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { ; AVX-LABEL: test_x86_sse2_cvtdq2pd: ; AVX: ## BB#0: diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index ac8be0fa2e2..84f8f3cd150 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=AVX ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl,aes,pclmul | FileCheck %s --check-prefix=AVX512VL @@ -1800,102 +1800,6 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone -define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { -; AVX-LABEL: test_x86_sse41_pmovzxbd: -; AVX: ## BB#0: -; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse41_pmovzxbd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512VL-NEXT: retl - %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone - - -define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { -; AVX-LABEL: test_x86_sse41_pmovzxbq: -; AVX: ## BB#0: -; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse41_pmovzxbq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: retl - %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone - - -define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { -; AVX-LABEL: test_x86_sse41_pmovzxbw: -; AVX: ## BB#0: -; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse41_pmovzxbw: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VL-NEXT: retl - %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone - - -define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { -; AVX-LABEL: test_x86_sse41_pmovzxdq: -; AVX: ## BB#0: -; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse41_pmovzxdq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; AVX512VL-NEXT: retl - %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone - - -define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { -; AVX-LABEL: test_x86_sse41_pmovzxwd: -; AVX: ## BB#0: -; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse41_pmovzxwd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512VL-NEXT: retl - %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone - - -define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { -; AVX-LABEL: test_x86_sse41_pmovzxwq: -; AVX: ## BB#0: -; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX-NEXT: retl -; -; AVX512VL-LABEL: test_x86_sse41_pmovzxwq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; AVX512VL-NEXT: retl - %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone - - define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { ; AVX-LABEL: test_x86_sse41_pmuldq: ; AVX: ## BB#0: @@ -4126,7 +4030,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ; AVX512VL-LABEL: test_x86_avx_storeu_dq_256: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddb LCPI231_0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddb LCPI225_0, %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqu %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> @@ -4367,7 +4271,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) { ; ; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpermilpd LCPI245_0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpermilpd LCPI239_0, %ymm0, %ymm0 ; AVX512VL-NEXT: retl %res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1] ret <4 x double> %res @@ -4859,7 +4763,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX-LABEL: movnt_dq: ; AVX: ## BB#0: ; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0 +; AVX-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0 ; AVX-NEXT: vmovntdq %ymm0, (%eax) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl @@ -4867,7 +4771,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind { ; AVX512VL-LABEL: movnt_dq: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0 ; AVX512VL-NEXT: vmovntdq %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <2 x i64> %a1, <i64 1, i64 1> diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll index 684412e38e8..aad7e8b5fbf 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll @@ -740,11 +740,10 @@ define <4 x i64> @test_mm256_cvtepi8_epi16(<2 x i64> %a0) { ; X64-NEXT: vpmovsxbw %xmm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %call = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %arg0) - %res = bitcast <16 x i16> %call to <4 x i64> + %ext = sext <16 x i8> %arg0 to <16 x i16> + %res = bitcast <16 x i16> %ext to <4 x i64> ret <4 x i64> %res } -declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone define <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepi8_epi32: @@ -757,11 +756,11 @@ define <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) { ; X64-NEXT: vpmovsxbd %xmm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %call = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %arg0) - %res = bitcast <8 x i32> %call to <4 x i64> + %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %ext = sext <8 x i8> %shuf to <8 x i32> + %res = bitcast <8 x i32> %ext to <4 x i64> ret <4 x i64> %res } -declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone define <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepi8_epi64: @@ -774,10 +773,10 @@ define <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) { ; X64-NEXT: vpmovsxbq %xmm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %call = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %arg0) - ret <4 x i64> %call + %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %ext = sext <4 x i8> %shuf to <4 x i64> + ret <4 x i64> %ext } -declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone define <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepi16_epi32: @@ -790,11 +789,10 @@ define <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) { ; X64-NEXT: vpmovsxwd %xmm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <8 x i16> - %call = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %arg0) - %res = bitcast <8 x i32> %call to <4 x i64> + %ext = sext <8 x i16> %arg0 to <8 x i32> + %res = bitcast <8 x i32> %ext to <4 x i64> ret <4 x i64> %res } -declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone define <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepi16_epi64: @@ -807,10 +805,10 @@ define <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) { ; X64-NEXT: vpmovsxwq %xmm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <8 x i16> - %call = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %arg0) - ret <4 x i64> %call + %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %ext = sext <4 x i16> %shuf to <4 x i64> + ret <4 x i64> %ext } -declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone define <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepi32_epi64: @@ -823,10 +821,9 @@ define <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) { ; X64-NEXT: vpmovsxdq %xmm0, %ymm0 ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> - %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %arg0) - ret <4 x i64> %res + %ext = sext <4 x i32> %arg0 to <4 x i64> + ret <4 x i64> %ext } -declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone define <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepu8_epi16: @@ -839,11 +836,10 @@ define <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) { ; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %call = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %arg0) - %res = bitcast <16 x i16> %call to <4 x i64> + %ext = zext <16 x i8> %arg0 to <16 x i16> + %res = bitcast <16 x i16> %ext to <4 x i64> ret <4 x i64> %res } -declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone define <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepu8_epi32: @@ -856,11 +852,11 @@ define <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) { ; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %call = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %arg0) - %res = bitcast <8 x i32> %call to <4 x i64> + %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %ext = zext <8 x i8> %shuf to <8 x i32> + %res = bitcast <8 x i32> %ext to <4 x i64> ret <4 x i64> %res } -declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone define <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepu8_epi64: @@ -873,10 +869,10 @@ define <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) { ; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %call = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %arg0) - ret <4 x i64> %call + %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %ext = zext <4 x i8> %shuf to <4 x i64> + ret <4 x i64> %ext } -declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone define <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepu16_epi32: @@ -889,11 +885,10 @@ define <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) { ; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <8 x i16> - %call = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %arg0) - %res = bitcast <8 x i32> %call to <4 x i64> + %ext = zext <8 x i16> %arg0 to <8 x i32> + %res = bitcast <8 x i32> %ext to <4 x i64> ret <4 x i64> %res } -declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone define <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepu16_epi64: @@ -906,10 +901,10 @@ define <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) { ; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <8 x i16> - %call = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %arg0) - ret <4 x i64> %call + %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %ext = zext <4 x i16> %shuf to <4 x i64> + ret <4 x i64> %ext } -declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone define <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm256_cvtepu32_epi64: @@ -922,10 +917,9 @@ define <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) { ; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> - %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %arg0) - ret <4 x i64> %res + %ext = zext <4 x i32> %arg0 to <4 x i64> + ret <4 x i64> %ext } -declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone define <2 x i64> @test_mm256_extracti128_si256(<4 x i64> %a0) nounwind { ; X32-LABEL: test_mm256_extracti128_si256: diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll index 36b6da5ef96..95f18610585 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll @@ -203,3 +203,99 @@ define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) { ret <4 x i64> %res } declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly + + +define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { +; CHECK: vpmovsxbd + %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { +; CHECK: vpmovsxbq + %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { +; CHECK: vpmovsxbw + %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { +; CHECK: vpmovsxdq + %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { +; CHECK: vpmovsxwd + %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { +; CHECK: vpmovsxwq + %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { +; CHECK: vpmovzxbd + %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { +; CHECK: vpmovzxbq + %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { +; CHECK: vpmovzxbw + %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { +; CHECK: vpmovzxdq + %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone + + +define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { +; CHECK: vpmovzxwd + %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone + + +define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { +; CHECK: vpmovzxwq + %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 8e96df2cab5..b5c4dbcb777 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by update_llc_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=AVX2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=AVX512VL @@ -1078,198 +1077,6 @@ define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) { declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone -define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovsxbd: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovsxbd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovsxbd %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone - - -define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovsxbq: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovsxbq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovsxbq %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone - - -define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovsxbw: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovsxbw: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] - ret <16 x i16> %res -} -declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone - - -define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovsxdq: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovsxdq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovsxdq %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone - - -define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovsxwd: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovsxwd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone - - -define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovsxwq: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovsxwq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovsxwq %xmm0, %ymm0 -; AVX512VL-NEXT: retl - %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone - - -define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovzxbd: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovzxbd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512VL-NEXT: retl - %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone - - -define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovzxbq: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovzxbq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: retl - %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone - - -define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovzxbw: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovzxbw: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512VL-NEXT: retl - %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1] - ret <16 x i16> %res -} -declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone - - -define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovzxdq: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovzxdq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512VL-NEXT: retl - %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone - - -define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovzxwd: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovzxwd: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512VL-NEXT: retl - %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1] - ret <8 x i32> %res -} -declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone - - -define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) { -; AVX2-LABEL: test_x86_avx2_pmovzxwq: -; AVX2: ## BB#0: -; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-NEXT: retl -; -; AVX512VL-LABEL: test_x86_avx2_pmovzxwq: -; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512VL-NEXT: retl - %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1] - ret <4 x i64> %res -} -declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone - - define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) { %res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1] ret <4 x i64> %res @@ -1674,7 +1481,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ; AVX2-LABEL: test_x86_avx_storeu_dq_256: ; AVX2: ## BB#0: ; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX2-NEXT: vpaddb LCPI103_0, %ymm0, %ymm0 +; AVX2-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0 ; AVX2-NEXT: vmovdqu %ymm0, (%eax) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retl @@ -1682,7 +1489,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) { ; AVX512VL-LABEL: test_x86_avx_storeu_dq_256: ; AVX512VL: ## BB#0: ; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax -; AVX512VL-NEXT: vpaddb LCPI103_0, %ymm0, %ymm0 +; AVX512VL-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqu %ymm0, (%eax) ; AVX512VL-NEXT: retl %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> diff --git a/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll b/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll index 6bd6a5041d4..f281bbaa675 100644 --- a/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll @@ -1,10 +1,10 @@ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s -define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) { -; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw +define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) { +; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbw ; CHECK: vpmovsxbw (%rdi), %ymm0 %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1) + %2 = sext <16 x i8> %1 to <16 x i16> ret <16 x i16> %2 } @@ -12,23 +12,25 @@ define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd ; CHECK: vpmovsxbd (%rdi), %ymm0 %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1) - ret <8 x i32> %2 + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %3 = sext <8 x i8> %2 to <8 x i32> + ret <8 x i32> %3 } define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq ; CHECK: vpmovsxbq (%rdi), %ymm0 %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1) - ret <4 x i64> %2 + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = sext <4 x i8> %2 to <4 x i64> + ret <4 x i64> %3 } define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd ; CHECK: vpmovsxwd (%rdi), %ymm0 %1 = load <8 x i16>, <8 x i16>* %a, align 1 - %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1) + %2 = sext <8 x i16> %1 to <8 x i32> ret <8 x i32> %2 } @@ -36,23 +38,24 @@ define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq ; CHECK: vpmovsxwq (%rdi), %ymm0 %1 = load <8 x i16>, <8 x i16>* %a, align 1 - %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1) - ret <4 x i64> %2 + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = sext <4 x i16> %2 to <4 x i64> + ret <4 x i64> %3 } define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq ; CHECK: vpmovsxdq (%rdi), %ymm0 %1 = load <4 x i32>, <4 x i32>* %a, align 1 - %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1) + %2 = sext <4 x i32> %1 to <4 x i64> ret <4 x i64> %2 } -define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) { -; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw +define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) { +; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbw ; CHECK: vpmovzxbw (%rdi), %ymm0 %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1) + %2 = zext <16 x i8> %1 to <16 x i16> ret <16 x i16> %2 } @@ -60,23 +63,25 @@ define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd ; CHECK: vpmovzxbd (%rdi), %ymm0 %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1) - ret <8 x i32> %2 + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %3 = zext <8 x i8> %2 to <8 x i32> + ret <8 x i32> %3 } define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq ; CHECK: vpmovzxbq (%rdi), %ymm0 %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1) - ret <4 x i64> %2 + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = zext <4 x i8> %2 to <4 x i64> + ret <4 x i64> %3 } define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd ; CHECK: vpmovzxwd (%rdi), %ymm0 %1 = load <8 x i16>, <8 x i16>* %a, align 1 - %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1) + %2 = zext <8 x i16> %1 to <8 x i32> ret <8 x i32> %2 } @@ -84,27 +89,15 @@ define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq ; CHECK: vpmovzxwq (%rdi), %ymm0 %1 = load <8 x i16>, <8 x i16>* %a, align 1 - %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1) - ret <4 x i64> %2 + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = zext <4 x i16> %2 to <4 x i64> + ret <4 x i64> %3 } define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) { ; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq ; CHECK: vpmovzxdq (%rdi), %ymm0 %1 = load <4 x i32>, <4 x i32>* %a, align 1 - %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1) + %2 = zext <4 x i32> %1 to <4 x i64> ret <4 x i64> %2 } - -declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) -declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) -declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) -declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) -declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) -declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) -declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) -declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) -declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) -declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) -declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) -declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll index 03137ceaac0..7bfce0941a2 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -301,11 +301,11 @@ define <2 x i64> @test_mm_cvtepu8_epi16(<2 x i64> %a0) { ; X64-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %zext = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %arg0) - %res = bitcast <8 x i16> %zext to <2 x i64> + %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %sext = zext <8 x i8> %ext0 to <8 x i16> + %res = bitcast <8 x i16> %sext to <2 x i64> ret <2 x i64> %res } -declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm_cvtepu8_epi32: @@ -318,11 +318,11 @@ define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) { ; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %zext = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %arg0) - %res = bitcast <4 x i32> %zext to <2 x i64> + %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %sext = zext <4 x i8> %ext0 to <4 x i32> + %res = bitcast <4 x i32> %sext to <2 x i64> ret <2 x i64> %res } -declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm_cvtepu8_epi64: @@ -335,10 +335,10 @@ define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) { ; X64-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> - %zext = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %arg0) - ret <2 x i64> %zext + %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> + %sext = zext <2 x i8> %ext0 to <2 x i64> + ret <2 x i64> %sext } -declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) { ; X32-LABEL: test_mm_cvtepu16_epi32: @@ -351,11 +351,11 @@ define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) { ; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <8 x i16> - %zext = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %arg0) - %res = bitcast <4 x i32> %zext to <2 x i64> + %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %sext = zext <4 x i16> %ext0 to <4 x i32> + %res = bitcast <4 x i32> %sext to <2 x i64> ret <2 x i64> %res } -declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm_cvtepu16_epi64: @@ -368,10 +368,10 @@ define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) { ; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <8 x i16> - %zext = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %arg0) - ret <2 x i64> %zext + %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> + %sext = zext <2 x i16> %ext0 to <2 x i64> + ret <2 x i64> %sext } -declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) { ; X32-LABEL: test_mm_cvtepu32_epi64: @@ -384,10 +384,10 @@ define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) { ; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <4 x i32> - %zext = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %arg0) - ret <2 x i64> %zext + %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %sext = zext <2 x i32> %ext0 to <2 x i64> + ret <2 x i64> %sext } -declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) { ; X32-LABEL: test_mm_dp_pd: diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll index 2c3c02baf97..72bf4395bb9 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll @@ -145,3 +145,69 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) { ret <2 x i64> %res } declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxbd: +; CHECK: ## BB#0: +; CHECK-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; CHECK-NEXT: retl + %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxbq: +; CHECK: ## BB#0: +; CHECK-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: retl + %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxbw: +; CHECK: ## BB#0: +; CHECK-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: retl + %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxdq: +; CHECK: ## BB#0: +; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; CHECK-NEXT: retl + %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone + + +define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxwd: +; CHECK: ## BB#0: +; CHECK-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; CHECK-NEXT: retl + %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone + + +define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { +; CHECK-LABEL: test_x86_sse41_pmovzxwq: +; CHECK: ## BB#0: +; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero +; CHECK-NEXT: retl + %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll index 6b4ea6b7c20..b8d058cc12e 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -284,102 +284,6 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) { declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone -define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) { -; SSE41-LABEL: test_x86_sse41_pmovzxbd: -; SSE41: ## BB#0: -; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE41-NEXT: retl -; -; KNL-LABEL: test_x86_sse41_pmovzxbd: -; KNL: ## BB#0: -; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; KNL-NEXT: retl - %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone - - -define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) { -; SSE41-LABEL: test_x86_sse41_pmovzxbq: -; SSE41: ## BB#0: -; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; SSE41-NEXT: retl -; -; KNL-LABEL: test_x86_sse41_pmovzxbq: -; KNL: ## BB#0: -; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; KNL-NEXT: retl - %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone - - -define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) { -; SSE41-LABEL: test_x86_sse41_pmovzxbw: -; SSE41: ## BB#0: -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: retl -; -; KNL-LABEL: test_x86_sse41_pmovzxbw: -; KNL: ## BB#0: -; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; KNL-NEXT: retl - %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1] - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone - - -define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) { -; SSE41-LABEL: test_x86_sse41_pmovzxdq: -; SSE41: ## BB#0: -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; SSE41-NEXT: retl -; -; KNL-LABEL: test_x86_sse41_pmovzxdq: -; KNL: ## BB#0: -; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero -; KNL-NEXT: retl - %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone - - -define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) { -; SSE41-LABEL: test_x86_sse41_pmovzxwd: -; SSE41: ## BB#0: -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE41-NEXT: retl -; -; KNL-LABEL: test_x86_sse41_pmovzxwd: -; KNL: ## BB#0: -; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; KNL-NEXT: retl - %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1] - ret <4 x i32> %res -} -declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone - - -define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) { -; SSE41-LABEL: test_x86_sse41_pmovzxwq: -; SSE41: ## BB#0: -; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; SSE41-NEXT: retl -; -; KNL-LABEL: test_x86_sse41_pmovzxwq: -; KNL: ## BB#0: -; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; KNL-NEXT: retl - %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1] - ret <2 x i64> %res -} -declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone - - define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { ; SSE41-LABEL: test_x86_sse41_pmuldq: ; SSE41: ## BB#0: diff --git a/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll b/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll index a7e48d8ac03..756beb995c0 100644 --- a/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll +++ b/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll @@ -109,8 +109,9 @@ define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) { ; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX-NEXT: retq %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1) - ret <8 x i16> %2 + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %3 = zext <8 x i8> %2 to <8 x i16> + ret <8 x i16> %3 } define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) { @@ -124,8 +125,9 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) { ; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; AVX-NEXT: retq %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1) - ret <4 x i32> %2 + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = zext <4 x i8> %2 to <4 x i32> + ret <4 x i32> %3 } define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) { @@ -139,8 +141,9 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) { ; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero ; AVX-NEXT: retq %1 = load <16 x i8>, <16 x i8>* %a, align 1 - %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1) - ret <2 x i64> %2 + %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1> + %3 = zext <2 x i8> %2 to <2 x i64> + ret <2 x i64> %3 } define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) { @@ -154,8 +157,9 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) { ; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a, align 1 - %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1) - ret <4 x i32> %2 + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = zext <4 x i16> %2 to <4 x i32> + ret <4 x i32> %3 } define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) { @@ -169,8 +173,9 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) { ; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero ; AVX-NEXT: retq %1 = load <8 x i16>, <8 x i16>* %a, align 1 - %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1) - ret <2 x i64> %2 + %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1> + %3 = zext <2 x i16> %2 to <2 x i64> + ret <2 x i64> %3 } define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) { @@ -184,13 +189,7 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) { ; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero ; AVX-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %a, align 1 - %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1) - ret <2 x i64> %2 + %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1> + %3 = zext <2 x i32> %2 to <2 x i64> + ret <2 x i64> %3 } - -declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) -declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) -declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) -declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) -declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) -declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll index 897c749cc0e..ef7fa221714 100644 --- a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll +++ b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll @@ -662,19 +662,19 @@ define <8 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) { ;CHECK-LABEL: stack_fold_pmovsxbd ;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) - ret <8 x i32> %2 + %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %3 = sext <8 x i8> %2 to <8 x i32> + ret <8 x i32> %3 } -declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone define <4 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) { ;CHECK-LABEL: stack_fold_pmovsxbq ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) - ret <4 x i64> %2 + %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = sext <4 x i8> %2 to <4 x i64> + ret <4 x i64> %3 } -declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone define <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) { ;CHECK-LABEL: stack_fold_pmovsxbw @@ -704,64 +704,61 @@ define <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) { ;CHECK-LABEL: stack_fold_pmovsxwq ;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) - ret <4 x i64> %2 + %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = sext <4 x i16> %2 to <4 x i64> + ret <4 x i64> %3 } -declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone define <8 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) { ;CHECK-LABEL: stack_fold_pmovzxbd ;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) - ret <8 x i32> %2 + %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + %3 = zext <8 x i8> %2 to <8 x i32> + ret <8 x i32> %3 } -declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone define <4 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) { ;CHECK-LABEL: stack_fold_pmovzxbq ;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) - ret <4 x i64> %2 + %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = zext <4 x i8> %2 to <4 x i64> + ret <4 x i64> %3 } -declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone define <16 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) { ;CHECK-LABEL: stack_fold_pmovzxbw ;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) + %2 = zext <16 x i8> %a0 to <16 x i16> ret <16 x i16> %2 } -declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone define <4 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) { ;CHECK-LABEL: stack_fold_pmovzxdq ;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) + %2 = zext <4 x i32> %a0 to <4 x i64> ret <4 x i64> %2 } -declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone define <8 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) { ;CHECK-LABEL: stack_fold_pmovzxwd ;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) + %2 = zext <8 x i16> %a0 to <8 x i32> ret <8 x i32> %2 } -declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone define <4 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) { ;CHECK-LABEL: stack_fold_pmovzxwq ;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() - %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) - ret <4 x i64> %2 + %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %3 = zext <4 x i16> %2 to <4 x i64> + ret <4 x i64> %3 } -declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone define <4 x i64> @stack_fold_pmuldq(<8 x i32> %a0, <8 x i32> %a1) { ;CHECK-LABEL: stack_fold_pmuldq diff --git a/llvm/test/Transforms/InstCombine/x86-pmovsx.ll b/llvm/test/Transforms/InstCombine/x86-pmovsx.ll deleted file mode 100644 index 52cf4124210..00000000000 --- a/llvm/test/Transforms/InstCombine/x86-pmovsx.ll +++ /dev/null @@ -1,70 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s - -declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone -declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone -declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone - -; -; Basic sign extension tests -; - -define <8 x i32> @avx2_pmovsxbd(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovsxbd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP2]] -; - %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %v) - ret <8 x i32> %res -} - -define <4 x i64> @avx2_pmovsxbq(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovsxbq( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[TMP2]] -; - %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %v) - ret <4 x i64> %res -} - -define <16 x i16> @avx2_pmovsxbw(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovsxbw( -; CHECK-NEXT: [[TMP1:%.*]] = sext <16 x i8> %v to <16 x i16> -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %v) - ret <16 x i16> %res -} - -define <4 x i64> @avx2_pmovsxdq(<4 x i32> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovsxdq( -; CHECK-NEXT: [[TMP1:%.*]] = sext <4 x i32> %v to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %v) - ret <4 x i64> %res -} - -define <8 x i32> @avx2_pmovsxwd(<8 x i16> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovsxwd( -; CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i16> %v to <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %v) - ret <8 x i32> %res -} - -define <4 x i64> @avx2_pmovsxwq(<8 x i16> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovsxwq( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[TMP2]] -; - %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %v) - ret <4 x i64> %res -} diff --git a/llvm/test/Transforms/InstCombine/x86-pmovzx.ll b/llvm/test/Transforms/InstCombine/x86-pmovzx.ll deleted file mode 100644 index 1853692d85b..00000000000 --- a/llvm/test/Transforms/InstCombine/x86-pmovzx.ll +++ /dev/null @@ -1,137 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instcombine -S | FileCheck %s - -declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone -declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone -declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone -declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone -declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone - -declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone -declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone -declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone -declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone - -; -; Basic zero extension tests -; - -define <4 x i32> @sse41_pmovzxbd(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @sse41_pmovzxbd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[TMP2]] -; - %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %v) - ret <4 x i32> %res -} - -define <2 x i64> @sse41_pmovzxbq(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @sse41_pmovzxbq( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[TMP2]] -; - %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %v) - ret <2 x i64> %res -} - -define <8 x i16> @sse41_pmovzxbw(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @sse41_pmovzxbw( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16> -; CHECK-NEXT: ret <8 x i16> [[TMP2]] -; - %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %v) - ret <8 x i16> %res -} - -define <2 x i64> @sse41_pmovzxdq(<4 x i32> %v) nounwind readnone { -; CHECK-LABEL: @sse41_pmovzxdq( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[TMP2]] -; - %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %v) - ret <2 x i64> %res -} - -define <4 x i32> @sse41_pmovzxwd(<8 x i16> %v) nounwind readnone { -; CHECK-LABEL: @sse41_pmovzxwd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[TMP2]] -; - %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %v) - ret <4 x i32> %res -} - -define <2 x i64> @sse41_pmovzxwq(<8 x i16> %v) nounwind readnone { -; CHECK-LABEL: @sse41_pmovzxwq( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1> -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP1]] to <2 x i64> -; CHECK-NEXT: ret <2 x i64> [[TMP2]] -; - %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %v) - ret <2 x i64> %res -} - -define <8 x i32> @avx2_pmovzxbd(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovzxbd( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> -; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP2]] -; - %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %v) - ret <8 x i32> %res -} - -define <4 x i64> @avx2_pmovzxbq(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovzxbq( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[TMP2]] -; - %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %v) - ret <4 x i64> %res -} - -define <16 x i16> @avx2_pmovzxbw(<16 x i8> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovzxbw( -; CHECK-NEXT: [[TMP1:%.*]] = zext <16 x i8> %v to <16 x i16> -; CHECK-NEXT: ret <16 x i16> [[TMP1]] -; - %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %v) - ret <16 x i16> %res -} - -define <4 x i64> @avx2_pmovzxdq(<4 x i32> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovzxdq( -; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i32> %v to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[TMP1]] -; - %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %v) - ret <4 x i64> %res -} - -define <8 x i32> @avx2_pmovzxwd(<8 x i16> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovzxwd( -; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i16> %v to <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[TMP1]] -; - %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %v) - ret <8 x i32> %res -} - -define <4 x i64> @avx2_pmovzxwq(<8 x i16> %v) nounwind readnone { -; CHECK-LABEL: @avx2_pmovzxwq( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> -; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i64> -; CHECK-NEXT: ret <4 x i64> [[TMP2]] -; - %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %v) - ret <4 x i64> %res -} |

