summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2015-09-09 14:35:09 +0000
committerIgor Breger <igor.breger@intel.com>2015-09-09 14:35:09 +0000
commitac29a8292193c623fb81c80c6995836df415b935 (patch)
tree7968a4f7307fc58bef41ee8836f2af87d68661da /llvm/test/CodeGen
parent2fbab9d89351e52e84b412371628754217259a22 (diff)
downloadbcm5719-llvm-ac29a8292193c623fb81c80c6995836df415b935.tar.gz
bcm5719-llvm-ac29a8292193c623fb81c80c6995836df415b935.zip
AVX512: Implemented encoding and intrinsics for
vextracti64x4 ,vextracti64x2, vextracti32x8, vextracti32x4, vextractf64x4, vextractf64x2, vextractf32x8, vextractf32x4 Added tests for intrinsics and encoding. Differential Revision: http://reviews.llvm.org/D11802 llvm-svn: 247149
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx512-cvt.ll18
-rw-r--r--llvm/test/CodeGen/X86/avx512-insert-extract.ll46
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll16
-rw-r--r--llvm/test/CodeGen/X86/avx512dq-intrinsics.ll41
-rw-r--r--llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll20
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll20
6 files changed, 132 insertions, 29 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll
index aa34076c910..586a2954501 100644
--- a/llvm/test/CodeGen/X86/avx512-cvt.ll
+++ b/llvm/test/CodeGen/X86/avx512-cvt.ll
@@ -52,14 +52,14 @@ define <4 x i64> @f32tosl(<4 x float> %a) {
}
; CHECK-LABEL: sltof432
-; CHECK: vcvtqq2ps
+; CHECK: vcvtqq2ps
define <4 x float> @sltof432(<4 x i64> %a) {
%b = sitofp <4 x i64> %a to <4 x float>
ret <4 x float> %b
}
; CHECK-LABEL: ultof432
-; CHECK: vcvtuqq2ps
+; CHECK: vcvtuqq2ps
define <4 x float> @ultof432(<4 x i64> %a) {
%b = uitofp <4 x i64> %a to <4 x float>
ret <4 x float> %b
@@ -279,12 +279,14 @@ define i32 @float_to_int(float %x) {
ret i32 %res
}
-; CHECK-LABEL: uitof64
-; CHECK: vcvtudq2pd
-; CHECK: vextracti64x4
-; CHECK: vcvtudq2pd
-; CHECK: ret
define <16 x double> @uitof64(<16 x i32> %a) nounwind {
+; CHECK-LABEL: uitof64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm2
+; CHECK-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm1
+; CHECK-NEXT: vmovaps %zmm2, %zmm0
+; CHECK-NEXT: retq
%b = uitofp <16 x i32> %a to <16 x double>
ret <16 x double> %b
}
@@ -407,7 +409,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
}
; CHECK-LABEL: @uitofp_16i8
-; CHECK: vpmovzxbd
+; CHECK: vpmovzxbd
; CHECK: vcvtudq2ps
define <16 x float> @uitofp_16i8(<16 x i8>%a) {
%b = uitofp <16 x i8> %a to <16 x float>
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 6f985f0bf3a..52398acd48f 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -12,14 +12,24 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind {
ret <16 x float> %rrr3
}
-;CHECK-LABEL: test2:
-;KNL: vinsertf32x4 $0
-;SKX: vinsertf64x2 $0
-;CHECK: vextractf32x4 $3
-;KNL: vinsertf32x4 $3
-;SKX: vinsertf64x2 $3
-;CHECK: ret
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
+; KNL-LABEL: test2:
+; KNL: ## BB#0:
+; KNL-NEXT: vmovhpd (%rdi), %xmm0, %xmm2
+; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm0
+; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm2
+; KNL-NEXT: vmovsd %xmm1, %xmm2, %xmm1
+; KNL-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test2:
+; SKX: ## BB#0:
+; SKX-NEXT: vmovhpd (%rdi), %xmm0, %xmm2
+; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm0
+; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm2
+; SKX-NEXT: vmovsd %xmm1, %xmm2, %xmm1
+; SKX-NEXT: vinsertf64x2 $3, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
%rrr = load double, double* %br
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
@@ -36,12 +46,22 @@ define <16 x float> @test3(<16 x float> %x) nounwind {
ret <16 x float> %rrr2
}
-;CHECK-LABEL: test4:
-;CHECK: vextracti32x4 $2
-;KNL: vinserti32x4 $0
-;SKX: vinserti64x2 $0
-;CHECK: ret
define <8 x i64> @test4(<8 x i64> %x) nounwind {
+; KNL-LABEL: test4:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; KNL-NEXT: vmovq %xmm1, %rax
+; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
+; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test4:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti64x2 $2, %zmm0, %xmm1
+; SKX-NEXT: vmovq %xmm1, %rax
+; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
+; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
%eee = extractelement <8 x i64> %x, i32 4
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
ret <8 x i64> %rrr2
@@ -142,7 +162,7 @@ define i64 @test12(<16 x i64>%a, <16 x i64>%b, i64 %a1, i64 %b1) {
;CHECK: andl $1, %eax
;CHECK: kmovw %eax, %k0
;CHECK: movw $-4
-;CHECK: korw
+;CHECK: korw
define i16 @test13(i32 %a, i32 %b) {
%cmp_res = icmp ult i32 %a, %b
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %cmp_res, i32 0
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 6d18b25b770..4846f760cb7 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -911,38 +911,38 @@ declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounw
define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextractf32x4:
; CHECK: vextractf32x4 $2, %zmm1, %xmm0 {%k1}
- %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i8 2, <4 x float> %b, i8 %mask)
+ %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
ret <4 x float> %res
}
-declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i8, <4 x float>, i8)
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextracti64x4:
; CHECK: vextracti64x4 $2, %zmm1, %ymm0 {%k1}
- %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i8 2, <4 x i64> %b, i8 %mask)
+ %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 2, <4 x i64> %b, i8 %mask)
ret <4 x i64> %res
}
-declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i8, <4 x i64>, i8)
+declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
; CHECK-LABEL: test_maskz_vextracti32x4:
; CHECK: vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z}
- %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i8 2, <4 x i32> zeroinitializer, i8 %mask)
+ %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
ret <4 x i32> %res
}
-declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i8, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
define <4 x double> @test_vextractf64x4(<8 x double> %a) {
; CHECK-LABEL: test_vextractf64x4:
; CHECK: vextractf64x4 $2, %zmm0, %ymm0 ##
- %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i8 2, <4 x double> zeroinitializer, i8 -1)
+ %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 2, <4 x double> zeroinitializer, i8 -1)
ret <4 x double> %res
}
-declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i8, <4 x double>, i8)
+declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
; CHECK-LABEL: test_x86_avx512_pslli_d
diff --git a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
index 67a88e15570..c1335096ab2 100644
--- a/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512dq-intrinsics.ll
@@ -315,3 +315,44 @@ define <2 x double>@test_int_x86_avx512_mask_range_sd(<2 x double> %x0, <2 x dou
%res2 = fadd <2 x double> %res, %res1
ret <2 x double> %res2
}
+
+
+declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm1 {%k1}
+; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.512(<8 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res2, %res3
+ ret <2 x double> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1}
+; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> %x2, i8 %x3)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 %x3)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.vextractf32x8.512(<16 x float> %x0,i32 1, <8 x float> zeroinitializer, i8 -1)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
index 2fcfac0f1bb..8956f279acb 100644
--- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll
@@ -1648,3 +1648,23 @@ define <8 x float>@test_int_x86_avx512_mask_range_ps_256(<8 x float> %x0, <8 x f
%res2 = fadd <8 x float> %res, %res1
ret <8 x float> %res2
}
+
+declare <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double>, i32, <2 x double>, i8)
+
+define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_256(<4 x double> %x0, <2 x double> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vextractf64x2 $1, %ymm0, %xmm0
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddpd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> %x2, i8 %x3)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 %x3)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.vextractf64x2.256(<4 x double> %x0,i32 1, <2 x double> zeroinitializer, i8 -1)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res3, %res2
+ ret <2 x double> %res4
+}
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index b148db68092..1f0bacb0186 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -4508,6 +4508,26 @@ define <8 x float>@test_int_x86_avx512_mask_rndscale_ps_256(<8 x float> %x0, <8
ret <8 x float> %res2
}
+declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm1 {%k1}
+; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vextractf32x4 $1, %ymm0, %xmm0
+; CHECK-NEXT: vaddps %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
declare <2 x double> @llvm.x86.avx512.mask.getmant.pd.128(<2 x double>, i32, <2 x double>, i8)
define <2 x double>@test_int_x86_avx512_mask_getmant_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
OpenPOWER on IntegriCloud