summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJatin Bhateja <jatin.bhateja@gmail.com>2017-08-26 19:02:49 +0000
committerJatin Bhateja <jatin.bhateja@gmail.com>2017-08-26 19:02:49 +0000
commitc2f41b9f0b3e5982f0b0ee41589db79486d190be (patch)
tree6ed09d648ed75acf3cd6f5ed124c90fa70baf016
parente4ca95d6aae7f25dd78bab7a49ec6f69fae66a61 (diff)
downloadbcm5719-llvm-c2f41b9f0b3e5982f0b0ee41589db79486d190be.tar.gz
bcm5719-llvm-c2f41b9f0b3e5982f0b0ee41589db79486d190be.zip
[X86] Adding a test for horizontal [f]add/[f]sub for avx512 vector type 16x32.
Differential Revision: https://reviews.llvm.org/D37183 llvm-svn: 311834
-rw-r--r--llvm/test/CodeGen/X86/avx512-hadd-hsub.ll112
1 files changed, 112 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
new file mode 100644
index 00000000000..e15492b6866
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
+
+define i32 @hadd_16(<16 x i32> %x225) {
+; KNL-LABEL: hadd_16:
+; KNL: # BB#0:
+; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; KNL-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; KNL-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: hadd_16:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vmovd %xmm0, %eax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+ %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x227 = add <16 x i32> %x225, %x226
+ %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x229 = add <16 x i32> %x227, %x228
+ %x230 = extractelement <16 x i32> %x229, i32 0
+ ret i32 %x230
+}
+
+define i32 @hsub_16(<16 x i32> %x225) {
+; KNL-LABEL: hsub_16:
+; KNL: # BB#0:
+; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; KNL-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; KNL-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vmovd %xmm0, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: hsub_16:
+; SKX: # BB#0:
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; SKX-NEXT: vpaddd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
+; SKX-NEXT: vpsubd %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vmovd %xmm0, %eax
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+ %x226 = shufflevector <16 x i32> %x225, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x227 = add <16 x i32> %x225, %x226
+ %x228 = shufflevector <16 x i32> %x227, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x229 = sub <16 x i32> %x227, %x228
+ %x230 = extractelement <16 x i32> %x229, i32 0
+ ret i32 %x230
+}
+
+define float @fhadd_16(<16 x float> %x225) {
+; KNL-LABEL: fhadd_16:
+; KNL: # BB#0:
+; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; KNL-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; KNL-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: retq
+;
+; SKX-LABEL: fhadd_16:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+ %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x227 = fadd <16 x float> %x225, %x226
+ %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x229 = fadd <16 x float> %x227, %x228
+ %x230 = extractelement <16 x float> %x229, i32 0
+ ret float %x230
+}
+
+define float @fhsub_16(<16 x float> %x225) {
+; KNL-LABEL: fhsub_16:
+; KNL: # BB#0:
+; KNL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; KNL-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; KNL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; KNL-NEXT: vsubps %zmm1, %zmm0, %zmm0
+; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: retq
+;
+; SKX-LABEL: fhsub_16:
+; SKX: # BB#0:
+; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
+; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SKX-NEXT: vsubps %zmm1, %zmm0, %zmm0
+; SKX-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; SKX-NEXT: vzeroupper
+; SKX-NEXT: retq
+ %x226 = shufflevector <16 x float> %x225, <16 x float> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x227 = fadd <16 x float> %x225, %x226
+ %x228 = shufflevector <16 x float> %x227, <16 x float> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ %x229 = fsub <16 x float> %x227, %x228
+ %x230 = extractelement <16 x float> %x229, i32 0
+ ret float %x230
+}
+
OpenPOWER on IntegriCloud