summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2016-05-27 09:02:25 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2016-05-27 09:02:25 +0000
commit4642a57fbfda606d6af3da4f0ee5c17c96cfcce5 (patch)
tree9e974b951d5d21527747287cee31f89993d22637 /llvm/test/CodeGen
parent99302edd1c63639204f0f1a8799384995dba4ca1 (diff)
downloadbcm5719-llvm-4642a57fbfda606d6af3da4f0ee5c17c96cfcce5.tar.gz
bcm5719-llvm-4642a57fbfda606d6af3da4f0ee5c17c96cfcce5.zip
Revert: r270973 - [X86][SSE] Replace (V)PMOVSX and (V)PMOVZX integer extension intrinsics with generic IR (llvm)
llvm-svn: 270976
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll66
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-x86.ll106
-rw-r--r--llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll66
-rw-r--r--llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll96
-rw-r--r--llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll197
-rw-r--r--llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll63
-rw-r--r--llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll36
-rw-r--r--llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll66
-rw-r--r--llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll96
-rw-r--r--llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll37
-rw-r--r--llvm/test/CodeGen/X86/stack-folding-int-avx2.ll45
11 files changed, 524 insertions, 350 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
index 8d03784ce1b..862e9378afe 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86-upgrade.ll
@@ -247,72 +247,6 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxbd:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxbq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxbw:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT: retl
- %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxdq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxwd:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxwq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
-
-
define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
; AVX-LABEL: test_x86_sse2_cvtdq2pd:
; AVX: ## BB#0:
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
index 84f8f3cd150..ac8be0fa2e2 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx,aes,pclmul | FileCheck %s --check-prefix=AVX
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl,aes,pclmul | FileCheck %s --check-prefix=AVX512VL
@@ -1800,6 +1800,102 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+; AVX-LABEL: test_x86_sse41_pmovzxbd:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse41_pmovzxbd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX512VL-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+; AVX-LABEL: test_x86_sse41_pmovzxbq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse41_pmovzxbq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+; AVX-LABEL: test_x86_sse41_pmovzxbw:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse41_pmovzxbw:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512VL-NEXT: retl
+ %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+; AVX-LABEL: test_x86_sse41_pmovzxdq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse41_pmovzxdq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX512VL-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+; AVX-LABEL: test_x86_sse41_pmovzxwd:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse41_pmovzxwd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512VL-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+; AVX-LABEL: test_x86_sse41_pmovzxwq:
+; AVX: ## BB#0:
+; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_sse41_pmovzxwq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; AVX512VL-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
; AVX-LABEL: test_x86_sse41_pmuldq:
; AVX: ## BB#0:
@@ -4030,7 +4126,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI225_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddb LCPI231_0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
@@ -4271,7 +4367,7 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) {
;
; AVX512VL-LABEL: test_x86_avx_vpermilvar_pd_256_2:
; AVX512VL: ## BB#0:
-; AVX512VL-NEXT: vpermilpd LCPI239_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpermilpd LCPI245_0, %ymm0, %ymm0
; AVX512VL-NEXT: retl
%res = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 0, i64 2>) ; <<4 x double>> [#uses=1]
ret <4 x double> %res
@@ -4763,7 +4859,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX-LABEL: movnt_dq:
; AVX: ## BB#0:
; AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0
+; AVX-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX-NEXT: vmovntdq %ymm0, (%eax)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retl
@@ -4771,7 +4867,7 @@ define void @movnt_dq(i8* %p, <2 x i64> %a1) nounwind {
; AVX512VL-LABEL: movnt_dq:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddq LCPI266_0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpaddq LCPI272_0, %xmm0, %xmm0
; AVX512VL-NEXT: vmovntdq %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
index aad7e8b5fbf..684412e38e8 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-fast-isel.ll
@@ -740,10 +740,11 @@ define <4 x i64> @test_mm256_cvtepi8_epi16(<2 x i64> %a0) {
; X64-NEXT: vpmovsxbw %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %ext = sext <16 x i8> %arg0 to <16 x i16>
- %res = bitcast <16 x i16> %ext to <4 x i64>
+ %call = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %arg0)
+ %res = bitcast <16 x i16> %call to <4 x i64>
ret <4 x i64> %res
}
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi8_epi32:
@@ -756,11 +757,11 @@ define <4 x i64> @test_mm256_cvtepi8_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovsxbd %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %ext = sext <8 x i8> %shuf to <8 x i32>
- %res = bitcast <8 x i32> %ext to <4 x i64>
+ %call = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %arg0)
+ %res = bitcast <8 x i32> %call to <4 x i64>
ret <4 x i64> %res
}
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi8_epi64:
@@ -773,10 +774,10 @@ define <4 x i64> @test_mm256_cvtepi8_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovsxbq %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %ext = sext <4 x i8> %shuf to <4 x i64>
- ret <4 x i64> %ext
+ %call = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %arg0)
+ ret <4 x i64> %call
}
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi16_epi32:
@@ -789,10 +790,11 @@ define <4 x i64> @test_mm256_cvtepi16_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovsxwd %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
- %ext = sext <8 x i16> %arg0 to <8 x i32>
- %res = bitcast <8 x i32> %ext to <4 x i64>
+ %call = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %arg0)
+ %res = bitcast <8 x i32> %call to <4 x i64>
ret <4 x i64> %res
}
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi16_epi64:
@@ -805,10 +807,10 @@ define <4 x i64> @test_mm256_cvtepi16_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovsxwq %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
- %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %ext = sext <4 x i16> %shuf to <4 x i64>
- ret <4 x i64> %ext
+ %call = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %arg0)
+ ret <4 x i64> %call
}
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepi32_epi64:
@@ -821,9 +823,10 @@ define <4 x i64> @test_mm256_cvtepi32_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovsxdq %xmm0, %ymm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
- %ext = sext <4 x i32> %arg0 to <4 x i64>
- ret <4 x i64> %ext
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %arg0)
+ ret <4 x i64> %res
}
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu8_epi16:
@@ -836,10 +839,11 @@ define <4 x i64> @test_mm256_cvtepu8_epi16(<2 x i64> %a0) {
; X64-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %ext = zext <16 x i8> %arg0 to <16 x i16>
- %res = bitcast <16 x i16> %ext to <4 x i64>
+ %call = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %arg0)
+ %res = bitcast <16 x i16> %call to <4 x i64>
ret <4 x i64> %res
}
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu8_epi32:
@@ -852,11 +856,11 @@ define <4 x i64> @test_mm256_cvtepu8_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %ext = zext <8 x i8> %shuf to <8 x i32>
- %res = bitcast <8 x i32> %ext to <4 x i64>
+ %call = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %arg0)
+ %res = bitcast <8 x i32> %call to <4 x i64>
ret <4 x i64> %res
}
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu8_epi64:
@@ -869,10 +873,10 @@ define <4 x i64> @test_mm256_cvtepu8_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %shuf = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %ext = zext <4 x i8> %shuf to <4 x i64>
- ret <4 x i64> %ext
+ %call = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %arg0)
+ ret <4 x i64> %call
}
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu16_epi32:
@@ -885,10 +889,11 @@ define <4 x i64> @test_mm256_cvtepu16_epi32(<2 x i64> %a0) {
; X64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
- %ext = zext <8 x i16> %arg0 to <8 x i32>
- %res = bitcast <8 x i32> %ext to <4 x i64>
+ %call = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %arg0)
+ %res = bitcast <8 x i32> %call to <4 x i64>
ret <4 x i64> %res
}
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu16_epi64:
@@ -901,10 +906,10 @@ define <4 x i64> @test_mm256_cvtepu16_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
- %shuf = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %ext = zext <4 x i16> %shuf to <4 x i64>
- ret <4 x i64> %ext
+ %call = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %arg0)
+ ret <4 x i64> %call
}
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
define <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm256_cvtepu32_epi64:
@@ -917,9 +922,10 @@ define <4 x i64> @test_mm256_cvtepu32_epi64(<2 x i64> %a0) {
; X64-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
- %ext = zext <4 x i32> %arg0 to <4 x i64>
- ret <4 x i64> %ext
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %arg0)
+ ret <4 x i64> %res
}
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
define <2 x i64> @test_mm256_extracti128_si256(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm256_extracti128_si256:
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
index 95f18610585..36b6da5ef96 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll
@@ -203,99 +203,3 @@ define <4 x i64> @test_x86_avx2_pbroadcastq_256(<2 x i64> %a0) {
ret <4 x i64> %res
}
declare <4 x i64> @llvm.x86.avx2.pbroadcastq.256(<2 x i64>) nounwind readonly
-
-
-define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
-; CHECK: vpmovsxbd
- %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
- ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
-; CHECK: vpmovsxbq
- %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
-
-
-define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
-; CHECK: vpmovsxbw
- %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
- ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
-; CHECK: vpmovsxdq
- %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
-; CHECK: vpmovsxwd
- %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
- ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
-; CHECK: vpmovsxwq
- %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
-; CHECK: vpmovzxbd
- %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
- ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
-; CHECK: vpmovzxbq
- %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
-
-
-define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
-; CHECK: vpmovzxbw
- %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
- ret <16 x i16> %res
-}
-declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
-; CHECK: vpmovzxdq
- %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
-
-
-define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
-; CHECK: vpmovzxwd
- %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
- ret <8 x i32> %res
-}
-declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
-
-
-define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
-; CHECK: vpmovzxwq
- %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
- ret <4 x i64> %res
-}
-declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
index b5c4dbcb777..8e96df2cab5 100644
--- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 | FileCheck %s --check-prefix=AVX2
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=AVX512VL
@@ -1077,6 +1078,198 @@ define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
+define <8 x i32> @test_x86_avx2_pmovsxbd(<16 x i8> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovsxbd:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovsxbd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovsxbd %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxbq(<16 x i8> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovsxbq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovsxbq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovsxbq %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovsxbw(<16 x i8> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovsxbw:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovsxbw:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxdq(<4 x i32> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovsxdq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovsxdq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovsxdq %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovsxwd(<8 x i16> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovsxwd:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovsxwd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovsxwq(<8 x i16> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovsxwq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovsxwq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovsxwq %xmm0, %ymm0
+; AVX512VL-NEXT: retl
+ %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxbd(<16 x i8> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovzxbd:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovzxbd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX512VL-NEXT: retl
+ %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxbq(<16 x i8> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovzxbq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovzxbq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: retl
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <16 x i16> @test_x86_avx2_pmovzxbw(<16 x i8> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovzxbw:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovzxbw:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; AVX512VL-NEXT: retl
+ %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0) ; <<16 x i16>> [#uses=1]
+ ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxdq(<4 x i32> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovzxdq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovzxdq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512VL-NEXT: retl
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <8 x i32> @test_x86_avx2_pmovzxwd(<8 x i16> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovzxwd:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovzxwd:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512VL-NEXT: retl
+ %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0) ; <<8 x i32>> [#uses=1]
+ ret <8 x i32> %res
+}
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <4 x i64> @test_x86_avx2_pmovzxwq(<8 x i16> %a0) {
+; AVX2-LABEL: test_x86_avx2_pmovzxwq:
+; AVX2: ## BB#0:
+; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX2-NEXT: retl
+;
+; AVX512VL-LABEL: test_x86_avx2_pmovzxwq:
+; AVX512VL: ## BB#0:
+; AVX512VL-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX512VL-NEXT: retl
+ %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0) ; <<4 x i64>> [#uses=1]
+ ret <4 x i64> %res
+}
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
+
+
define <4 x i64> @test_x86_avx2_pmul.dq(<8 x i32> %a0, <8 x i32> %a1) {
%res = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> %a1) ; <<2 x i64>> [#uses=1]
ret <4 x i64> %res
@@ -1481,7 +1674,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; AVX2-LABEL: test_x86_avx_storeu_dq_256:
; AVX2: ## BB#0:
; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX2-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0
+; AVX2-NEXT: vpaddb LCPI103_0, %ymm0, %ymm0
; AVX2-NEXT: vmovdqu %ymm0, (%eax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retl
@@ -1489,7 +1682,7 @@ define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
; AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
; AVX512VL: ## BB#0:
; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; AVX512VL-NEXT: vpaddb LCPI91_0, %ymm0, %ymm0
+; AVX512VL-NEXT: vpaddb LCPI103_0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqu %ymm0, (%eax)
; AVX512VL-NEXT: retl
%a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
diff --git a/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll b/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
index f281bbaa675..6bd6a5041d4 100644
--- a/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx2-pmovxrm-intrinsics.ll
@@ -1,10 +1,10 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 -mattr=+avx2 | FileCheck %s
-define <16 x i16> @test_llvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbw
+define <16 x i16> @test_lvm_x86_avx2_pmovsxbw(<16 x i8>* %a) {
+; CHECK-LABEL: test_lvm_x86_avx2_pmovsxbw
; CHECK: vpmovsxbw (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = sext <16 x i8> %1 to <16 x i16>
+ %2 = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %1)
ret <16 x i16> %2
}
@@ -12,25 +12,23 @@ define <8 x i32> @test_llvm_x86_avx2_pmovsxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbd
; CHECK: vpmovsxbd (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %3 = sext <8 x i8> %2 to <8 x i32>
- ret <8 x i32> %3
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %1)
+ ret <8 x i32> %2
}
define <4 x i64> @test_llvm_x86_avx2_pmovsxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxbq
; CHECK: vpmovsxbq (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = sext <4 x i8> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %1)
+ ret <4 x i64> %2
}
define <8 x i32> @test_llvm_x86_avx2_pmovsxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwd
; CHECK: vpmovsxwd (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = sext <8 x i16> %1 to <8 x i32>
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %1)
ret <8 x i32> %2
}
@@ -38,24 +36,23 @@ define <4 x i64> @test_llvm_x86_avx2_pmovsxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxwq
; CHECK: vpmovsxwq (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = sext <4 x i16> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %1)
+ ret <4 x i64> %2
}
define <4 x i64> @test_llvm_x86_avx2_pmovsxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovsxdq
; CHECK: vpmovsxdq (%rdi), %ymm0
%1 = load <4 x i32>, <4 x i32>* %a, align 1
- %2 = sext <4 x i32> %1 to <4 x i64>
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %1)
ret <4 x i64> %2
}
-define <16 x i16> @test_llvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
-; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbw
+define <16 x i16> @test_lvm_x86_avx2_pmovzxbw(<16 x i8>* %a) {
+; CHECK-LABEL: test_lvm_x86_avx2_pmovzxbw
; CHECK: vpmovzxbw (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = zext <16 x i8> %1 to <16 x i16>
+ %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %1)
ret <16 x i16> %2
}
@@ -63,25 +60,23 @@ define <8 x i32> @test_llvm_x86_avx2_pmovzxbd(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbd
; CHECK: vpmovzxbd (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %3 = zext <8 x i8> %2 to <8 x i32>
- ret <8 x i32> %3
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %1)
+ ret <8 x i32> %2
}
define <4 x i64> @test_llvm_x86_avx2_pmovzxbq(<16 x i8>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxbq
; CHECK: vpmovzxbq (%rdi), %ymm0
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = zext <4 x i8> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %1)
+ ret <4 x i64> %2
}
define <8 x i32> @test_llvm_x86_avx2_pmovzxwd(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwd
; CHECK: vpmovzxwd (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = zext <8 x i16> %1 to <8 x i32>
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %1)
ret <8 x i32> %2
}
@@ -89,15 +84,27 @@ define <4 x i64> @test_llvm_x86_avx2_pmovzxwq(<8 x i16>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxwq
; CHECK: vpmovzxwq (%rdi), %ymm0
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = zext <4 x i16> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %1)
+ ret <4 x i64> %2
}
define <4 x i64> @test_llvm_x86_avx2_pmovzxdq(<4 x i32>* %a) {
; CHECK-LABEL: test_llvm_x86_avx2_pmovzxdq
; CHECK: vpmovzxdq (%rdi), %ymm0
%1 = load <4 x i32>, <4 x i32>* %a, align 1
- %2 = zext <4 x i32> %1 to <4 x i64>
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %1)
ret <4 x i64> %2
}
+
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>)
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>)
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>)
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>)
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>)
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>)
+declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>)
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>)
+declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>)
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>)
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>)
+declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>)
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
index 7bfce0941a2..03137ceaac0 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll
@@ -301,11 +301,11 @@ define <2 x i64> @test_mm_cvtepu8_epi16(<2 x i64> %a0) {
; X64-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %sext = zext <8 x i8> %ext0 to <8 x i16>
- %res = bitcast <8 x i16> %sext to <2 x i64>
+ %zext = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %arg0)
+ %res = bitcast <8 x i16> %zext to <2 x i64>
ret <2 x i64> %res
}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu8_epi32:
@@ -318,11 +318,11 @@ define <2 x i64> @test_mm_cvtepu8_epi32(<2 x i64> %a0) {
; X64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %sext = zext <4 x i8> %ext0 to <4 x i32>
- %res = bitcast <4 x i32> %sext to <2 x i64>
+ %zext = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %arg0)
+ %res = bitcast <4 x i32> %zext to <2 x i64>
ret <2 x i64> %res
}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu8_epi64:
@@ -335,10 +335,10 @@ define <2 x i64> @test_mm_cvtepu8_epi64(<2 x i64> %a0) {
; X64-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <16 x i8>
- %ext0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
- %sext = zext <2 x i8> %ext0 to <2 x i64>
- ret <2 x i64> %sext
+ %zext = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %arg0)
+ ret <2 x i64> %zext
}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu16_epi32:
@@ -351,11 +351,11 @@ define <2 x i64> @test_mm_cvtepu16_epi32(<2 x i64> %a0) {
; X64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
- %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %sext = zext <4 x i16> %ext0 to <4 x i32>
- %res = bitcast <4 x i32> %sext to <2 x i64>
+ %zext = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %arg0)
+ %res = bitcast <4 x i32> %zext to <2 x i64>
ret <2 x i64> %res
}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu16_epi64:
@@ -368,10 +368,10 @@ define <2 x i64> @test_mm_cvtepu16_epi64(<2 x i64> %a0) {
; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <8 x i16>
- %ext0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
- %sext = zext <2 x i16> %ext0 to <2 x i64>
- ret <2 x i64> %sext
+ %zext = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %arg0)
+ ret <2 x i64> %zext
}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm_cvtepu32_epi64:
@@ -384,10 +384,10 @@ define <2 x i64> @test_mm_cvtepu32_epi64(<2 x i64> %a0) {
; X64-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
- %ext0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- %sext = zext <2 x i32> %ext0 to <2 x i64>
- ret <2 x i64> %sext
+ %zext = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %arg0)
+ ret <2 x i64> %zext
}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
define <2 x double> @test_mm_dp_pd(<2 x double> %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_dp_pd:
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
index 72bf4395bb9..2c3c02baf97 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86-upgrade.ll
@@ -145,69 +145,3 @@ define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
ret <2 x i64> %res
}
declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxbd:
-; CHECK: ## BB#0:
-; CHECK-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxbq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
-
-
-define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxbw:
-; CHECK: ## BB#0:
-; CHECK-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; CHECK-NEXT: retl
- %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
- ret <8 x i16> %res
-}
-declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxdq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
-
-
-define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxwd:
-; CHECK: ## BB#0:
-; CHECK-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; CHECK-NEXT: retl
- %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %res
-}
-declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
-
-
-define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
-; CHECK-LABEL: test_x86_sse41_pmovzxwq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
-; CHECK-NEXT: retl
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
- ret <2 x i64> %res
-}
-declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
index b8d058cc12e..6b4ea6b7c20 100644
--- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
+++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll
@@ -284,6 +284,102 @@ define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
+define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
+; SSE41-LABEL: test_x86_sse41_pmovzxbd:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse41_pmovzxbd:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; KNL-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
+; SSE41-LABEL: test_x86_sse41_pmovzxbq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; SSE41-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse41_pmovzxbq:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
+; KNL-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
+
+
+define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
+; SSE41-LABEL: test_x86_sse41_pmovzxbw:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse41_pmovzxbw:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; KNL-NEXT: retl
+ %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
+; SSE41-LABEL: test_x86_sse41_pmovzxdq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; SSE41-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse41_pmovzxdq:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; KNL-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
+
+
+define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
+; SSE41-LABEL: test_x86_sse41_pmovzxwd:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse41_pmovzxwd:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; KNL-NEXT: retl
+ %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %res
+}
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
+
+
+define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
+; SSE41-LABEL: test_x86_sse41_pmovzxwq:
+; SSE41: ## BB#0:
+; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; SSE41-NEXT: retl
+;
+; KNL-LABEL: test_x86_sse41_pmovzxwq:
+; KNL: ## BB#0:
+; KNL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
+; KNL-NEXT: retl
+ %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %res
+}
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
+
+
define <2 x i64> @test_x86_sse41_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
; SSE41-LABEL: test_x86_sse41_pmuldq:
; SSE41: ## BB#0:
diff --git a/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll b/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
index 756beb995c0..a7e48d8ac03 100644
--- a/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/sse41-pmovxrm-intrinsics.ll
@@ -109,9 +109,8 @@ define <8 x i16> @test_llvm_x86_sse41_pmovzxbw(<16 x i8>* %a) {
; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %3 = zext <8 x i8> %2 to <8 x i16>
- ret <8 x i16> %3
+ %2 = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %1)
+ ret <8 x i16> %2
}
define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
@@ -125,9 +124,8 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxbd(<16 x i8>* %a) {
; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = zext <4 x i8> %2 to <4 x i32>
- ret <4 x i32> %3
+ %2 = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %1)
+ ret <4 x i32> %2
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
@@ -141,9 +139,8 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxbq(<16 x i8>* %a) {
; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
; AVX-NEXT: retq
%1 = load <16 x i8>, <16 x i8>* %a, align 1
- %2 = shufflevector <16 x i8> %1, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
- %3 = zext <2 x i8> %2 to <2 x i64>
- ret <2 x i64> %3
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %1)
+ ret <2 x i64> %2
}
define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
@@ -157,9 +154,8 @@ define <4 x i32> @test_llvm_x86_sse41_pmovzxwd(<8 x i16>* %a) {
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = zext <4 x i16> %2 to <4 x i32>
- ret <4 x i32> %3
+ %2 = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %1)
+ ret <4 x i32> %2
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
@@ -173,9 +169,8 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxwq(<8 x i16>* %a) {
; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
; AVX-NEXT: retq
%1 = load <8 x i16>, <8 x i16>* %a, align 1
- %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
- %3 = zext <2 x i16> %2 to <2 x i64>
- ret <2 x i64> %3
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %1)
+ ret <2 x i64> %2
}
define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
@@ -189,7 +184,13 @@ define <2 x i64> @test_llvm_x86_sse41_pmovzxdq(<4 x i32>* %a) {
; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero
; AVX-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %a, align 1
- %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
- %3 = zext <2 x i32> %2 to <2 x i64>
- ret <2 x i64> %3
+ %2 = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %1)
+ ret <2 x i64> %2
}
+
+declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>)
+declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>)
+declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>)
+declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>)
+declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>)
+declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>)
diff --git a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll
index ef7fa221714..897c749cc0e 100644
--- a/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-int-avx2.ll
@@ -662,19 +662,19 @@ define <8 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbd
;CHECK: vpmovsxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %3 = sext <8 x i8> %2 to <8 x i32>
- ret <8 x i32> %3
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %a0)
+ ret <8 x i32> %2
}
+declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
define <4 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbq
;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = sext <4 x i8> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %a0)
+ ret <4 x i64> %2
}
+declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
define <16 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovsxbw
@@ -704,61 +704,64 @@ define <4 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovsxwq
;CHECK: vpmovsxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = sext <4 x i16> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %a0)
+ ret <4 x i64> %2
}
+declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
define <8 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbd
;CHECK: vpmovzxbd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %3 = zext <8 x i8> %2 to <8 x i32>
- ret <8 x i32> %3
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %a0)
+ ret <8 x i32> %2
}
+declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
define <4 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbq
;CHECK: vpmovzxbq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = zext <4 x i8> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %a0)
+ ret <4 x i64> %2
}
+declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
define <16 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
;CHECK-LABEL: stack_fold_pmovzxbw
;CHECK: vpmovzxbw {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = zext <16 x i8> %a0 to <16 x i16>
+ %2 = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %a0)
ret <16 x i16> %2
}
+declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
define <4 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
;CHECK-LABEL: stack_fold_pmovzxdq
;CHECK: vpmovzxdq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = zext <4 x i32> %a0 to <4 x i64>
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %a0)
ret <4 x i64> %2
}
+declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
define <8 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovzxwd
;CHECK: vpmovzxwd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = zext <8 x i16> %a0 to <8 x i32>
+ %2 = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %a0)
ret <8 x i32> %2
}
+declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
define <4 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
;CHECK-LABEL: stack_fold_pmovzxwq
;CHECK: vpmovzxwq {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
- %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- %3 = zext <4 x i16> %2 to <4 x i64>
- ret <4 x i64> %3
+ %2 = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %a0)
+ ret <4 x i64> %2
}
+declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
define <4 x i64> @stack_fold_pmuldq(<8 x i32> %a0, <8 x i32> %a1) {
;CHECK-LABEL: stack_fold_pmuldq
OpenPOWER on IntegriCloud