summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2015-06-08 14:03:17 +0000
committerIgor Breger <igor.breger@intel.com>2015-06-08 14:03:17 +0000
commit00d9f8457bf49884c441492fe807b5409de98c8a (patch)
tree53a5e6c9cd8db757f4b95c6271f02af2550761c6 /llvm/test
parent7fad7e57e8137a2e6c6d8c21fe92d436a053b2f1 (diff)
downloadbcm5719-llvm-00d9f8457bf49884c441492fe807b5409de98c8a.tar.gz
bcm5719-llvm-00d9f8457bf49884c441492fe807b5409de98c8a.zip
AVX-512: Implemented 256/128bit VALIGND/Q instructions for SKX and KNL
Implemented DAG lowering for all these forms. Added tests for DAG lowering and encoding. Differential Revision: http://reviews.llvm.org/D10310 llvm-svn: 239300
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx512-shuffle.ll84
-rw-r--r--llvm/test/MC/X86/avx512-encodings.s60
-rw-r--r--llvm/test/MC/X86/x86-64-avx512f_vl.s119
3 files changed, 263 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffle.ll b/llvm/test/CodeGen/X86/avx512-shuffle.ll
index 8411fee1502..2683d6fe238 100644
--- a/llvm/test/CodeGen/X86/avx512-shuffle.ll
+++ b/llvm/test/CodeGen/X86/avx512-shuffle.ll
@@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK-SKX
; CHECK-LABEL: test1:
; CHECK: vpermps
@@ -250,3 +251,86 @@ define <8 x double> @test32(<8 x double> %a, <8 x double> %b) nounwind {
%c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 1, i32 10, i32 2, i32 undef, i32 5, i32 15, i32 undef>
ret <8 x double> %c
}
+
+define <16 x i32> @test_align_v16i32_rr(<16 x i32> %a, <16 x i32> %b) nounwind {
+; CHECK-LABEL: test_align_v16i32_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignd $3, %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ ret <16 x i32> %c
+}
+
+define <16 x i32> @test_align_v16i32_rm(<16 x i32>* %a.ptr, <16 x i32> %b) nounwind {
+; CHECK-LABEL: test_align_v16i32_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignd $3, (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %a = load <16 x i32>, <16 x i32>* %a.ptr
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ ret <16 x i32> %c
+}
+
+define <16 x i32> @test_align_v16i32_rm_mask(<16 x i32>* %a.ptr, <16 x i32> %b, <16 x i1> %mask) nounwind {
+; CHECK-LABEL: test_align_v16i32_rm_mask:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxbd %xmm1, %zmm1
+; CHECK-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm1, %zmm1
+; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k1
+; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
+; CHECK-NEXT: valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-NEXT: retq
+;
+; CHECK-SKX-LABEL: test_align_v16i32_rm_mask:
+; CHECK-SKX: ## BB#0:
+; CHECK-SKX-NEXT: vpmovb2m %xmm1, %k1
+; CHECK-SKX-NEXT: vmovdqa32 (%rdi), %zmm1
+; CHECK-SKX-NEXT: valignd $3, %zmm1, %zmm0, %zmm1 {%k1}
+; CHECK-SKX-NEXT: vmovaps %zmm1, %zmm0
+; CHECK-SKX-NEXT: retq
+ %a = load <16 x i32>, <16 x i32>* %a.ptr
+ %c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 3, i32 4, i32 5, i32 undef, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18>
+ %res = select <16 x i1> %mask,<16 x i32> %c, <16 x i32> %a
+ ret <16 x i32> %res
+}
+
+define <8 x double> @test_align_v8f64_rr(<8 x double> %a, <8 x double> %b) nounwind {
+; CHECK-LABEL: test_align_v8f64_rr:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignq $3, %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+ ret <8 x double> %c
+}
+
+define <8 x double> @test_align_v18f64_rm(<8 x double>* %a.ptr, <8 x double> %b) nounwind {
+; CHECK-LABEL: test_align_v18f64_rm:
+; CHECK: ## BB#0:
+; CHECK-NEXT: valignq $3, (%rdi), %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %a = load <8 x double>, <8 x double>* %a.ptr
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+ ret <8 x double> %c
+}
+
+define <8 x double> @test_align_v18f64_rm_mask(<8 x double>* %a.ptr, <8 x double> %b, <8 x i1> %mask) nounwind {
+; CHECK-LABEL: test_align_v18f64_rm_mask:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxwq %xmm1, %zmm1
+; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
+; CHECK-NEXT: valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+;
+; CHECK-SKX-LABEL: test_align_v18f64_rm_mask:
+; CHECK-SKX: ## BB#0:
+; CHECK-SKX-NEXT: vpmovw2m %xmm1, %k1
+; CHECK-SKX-NEXT: valignq $3, (%rdi), %zmm0, %zmm0 {%k1} {z}
+; CHECK-SKX-NEXT: retq
+ %a = load <8 x double>, <8 x double>* %a.ptr
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
+ %res = select <8 x i1> %mask,<8 x double> %c, <8 x double> zeroinitializer
+ ret <8 x double> %res
+}
+
diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s
index ba467afe153..ca0fccb2e3e 100644
--- a/llvm/test/MC/X86/avx512-encodings.s
+++ b/llvm/test/MC/X86/avx512-encodings.s
@@ -6084,6 +6084,66 @@ valignq $2, 0x100(%rsp), %zmm0, %zmm1
// CHECK: encoding: [0x62,0xf3,0xfd,0x49,0x03,0xcb,0x03]
valignq $3, %zmm3, %zmm0, %zmm1 {%k1}
+// CHECK: valignq $171, %zmm23, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x23,0xdd,0x48,0x03,0xe7,0xab]
+ valignq $0xab, %zmm23, %zmm4, %zmm28
+
+// CHECK: valignq $171, %zmm23, %zmm4, %zmm28 {%k3}
+// CHECK: encoding: [0x62,0x23,0xdd,0x4b,0x03,0xe7,0xab]
+ valignq $0xab, %zmm23, %zmm4, %zmm28 {%k3}
+
+// CHECK: valignq $171, %zmm23, %zmm4, %zmm28 {%k3} {z}
+// CHECK: encoding: [0x62,0x23,0xdd,0xcb,0x03,0xe7,0xab]
+ valignq $0xab, %zmm23, %zmm4, %zmm28 {%k3} {z}
+
+// CHECK: valignq $123, %zmm23, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x23,0xdd,0x48,0x03,0xe7,0x7b]
+ valignq $0x7b, %zmm23, %zmm4, %zmm28
+
+// CHECK: valignq $123, (%rcx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0x21,0x7b]
+ valignq $0x7b, (%rcx), %zmm4, %zmm28
+
+// CHECK: valignq $123, 291(%rax,%r14,8), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x23,0xdd,0x48,0x03,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ valignq $0x7b, 291(%rax,%r14,8), %zmm4, %zmm28
+
+// CHECK: valignq $123, (%rcx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0x21,0x7b]
+ valignq $0x7b, (%rcx){1to8}, %zmm4, %zmm28
+
+// CHECK: valignq $123, 8128(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0x62,0x7f,0x7b]
+ valignq $0x7b, 8128(%rdx), %zmm4, %zmm28
+
+// CHECK: valignq $123, 8192(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0xa2,0x00,0x20,0x00,0x00,0x7b]
+ valignq $0x7b, 8192(%rdx), %zmm4, %zmm28
+
+// CHECK: valignq $123, -8192(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0x62,0x80,0x7b]
+ valignq $0x7b, -8192(%rdx), %zmm4, %zmm28
+
+// CHECK: valignq $123, -8256(%rdx), %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x48,0x03,0xa2,0xc0,0xdf,0xff,0xff,0x7b]
+ valignq $0x7b, -8256(%rdx), %zmm4, %zmm28
+
+// CHECK: valignq $123, 1016(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0x62,0x7f,0x7b]
+ valignq $0x7b, 1016(%rdx){1to8}, %zmm4, %zmm28
+
+// CHECK: valignq $123, 1024(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0xa2,0x00,0x04,0x00,0x00,0x7b]
+ valignq $0x7b, 1024(%rdx){1to8}, %zmm4, %zmm28
+
+// CHECK: valignq $123, -1024(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0x62,0x80,0x7b]
+ valignq $0x7b, -1024(%rdx){1to8}, %zmm4, %zmm28
+
+// CHECK: valignq $123, -1032(%rdx){1to8}, %zmm4, %zmm28
+// CHECK: encoding: [0x62,0x63,0xdd,0x58,0x03,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
+ valignq $0x7b, -1032(%rdx){1to8}, %zmm4, %zmm28
+
// CHECK: vextractf32x4 $3
// CHECK: encoding: [0x62,0xf3,0x7d,0x49,0x19,0xd9,0x03]
vextractf32x4 $3, %zmm3, %xmm1 {%k1}
diff --git a/llvm/test/MC/X86/x86-64-avx512f_vl.s b/llvm/test/MC/X86/x86-64-avx512f_vl.s
index 983e87912ed..f521b3e42d4 100644
--- a/llvm/test/MC/X86/x86-64-avx512f_vl.s
+++ b/llvm/test/MC/X86/x86-64-avx512f_vl.s
@@ -11013,3 +11013,122 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0x63,0xad,0x30,0x43,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
vshufi64x2 $0x7b, -1032(%rdx){1to4}, %ymm26, %ymm25
+// CHECK: valignq $171, %xmm24, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0x83,0xed,0x00,0x03,0xd8,0xab]
+ valignq $0xab, %xmm24, %xmm18, %xmm19
+
+// CHECK: valignq $171, %xmm24, %xmm18, %xmm19 {%k5}
+// CHECK: encoding: [0x62,0x83,0xed,0x05,0x03,0xd8,0xab]
+ valignq $0xab, %xmm24, %xmm18, %xmm19 {%k5}
+
+// CHECK: valignq $171, %xmm24, %xmm18, %xmm19 {%k5} {z}
+// CHECK: encoding: [0x62,0x83,0xed,0x85,0x03,0xd8,0xab]
+ valignq $0xab, %xmm24, %xmm18, %xmm19 {%k5} {z}
+
+// CHECK: valignq $123, %xmm24, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0x83,0xed,0x00,0x03,0xd8,0x7b]
+ valignq $0x7b, %xmm24, %xmm18, %xmm19
+
+// CHECK: valignq $123, (%rcx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x19,0x7b]
+ valignq $0x7b, (%rcx), %xmm18, %xmm19
+
+// CHECK: valignq $123, 291(%rax,%r14,8), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xa3,0xed,0x00,0x03,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ valignq $0x7b, 291(%rax,%r14,8), %xmm18, %xmm19
+
+// CHECK: valignq $123, (%rcx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x19,0x7b]
+ valignq $0x7b, (%rcx){1to2}, %xmm18, %xmm19
+
+// CHECK: valignq $123, 2032(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x5a,0x7f,0x7b]
+ valignq $0x7b, 2032(%rdx), %xmm18, %xmm19
+
+// CHECK: valignq $123, 2048(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ valignq $0x7b, 2048(%rdx), %xmm18, %xmm19
+
+// CHECK: valignq $123, -2048(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x5a,0x80,0x7b]
+ valignq $0x7b, -2048(%rdx), %xmm18, %xmm19
+
+// CHECK: valignq $123, -2064(%rdx), %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x00,0x03,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ valignq $0x7b, -2064(%rdx), %xmm18, %xmm19
+
+// CHECK: valignq $123, 1016(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x5a,0x7f,0x7b]
+ valignq $0x7b, 1016(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: valignq $123, 1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x9a,0x00,0x04,0x00,0x00,0x7b]
+ valignq $0x7b, 1024(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: valignq $123, -1024(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x5a,0x80,0x7b]
+ valignq $0x7b, -1024(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: valignq $123, -1032(%rdx){1to2}, %xmm18, %xmm19
+// CHECK: encoding: [0x62,0xe3,0xed,0x10,0x03,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
+ valignq $0x7b, -1032(%rdx){1to2}, %xmm18, %xmm19
+
+// CHECK: valignq $171, %ymm26, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x03,0xca,0xab]
+ valignq $0xab, %ymm26, %ymm24, %ymm25
+
+// CHECK: valignq $171, %ymm26, %ymm24, %ymm25 {%k2}
+// CHECK: encoding: [0x62,0x03,0xbd,0x22,0x03,0xca,0xab]
+ valignq $0xab, %ymm26, %ymm24, %ymm25 {%k2}
+
+// CHECK: valignq $171, %ymm26, %ymm24, %ymm25 {%k2} {z}
+// CHECK: encoding: [0x62,0x03,0xbd,0xa2,0x03,0xca,0xab]
+ valignq $0xab, %ymm26, %ymm24, %ymm25 {%k2} {z}
+
+// CHECK: valignq $123, %ymm26, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x03,0xca,0x7b]
+ valignq $0x7b, %ymm26, %ymm24, %ymm25
+
+// CHECK: valignq $123, (%rcx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x09,0x7b]
+ valignq $0x7b, (%rcx), %ymm24, %ymm25
+
+// CHECK: valignq $123, 291(%rax,%r14,8), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x23,0xbd,0x20,0x03,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ valignq $0x7b, 291(%rax,%r14,8), %ymm24, %ymm25
+
+// CHECK: valignq $123, (%rcx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x09,0x7b]
+ valignq $0x7b, (%rcx){1to4}, %ymm24, %ymm25
+
+// CHECK: valignq $123, 4064(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x4a,0x7f,0x7b]
+ valignq $0x7b, 4064(%rdx), %ymm24, %ymm25
+
+// CHECK: valignq $123, 4096(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ valignq $0x7b, 4096(%rdx), %ymm24, %ymm25
+
+// CHECK: valignq $123, -4096(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x4a,0x80,0x7b]
+ valignq $0x7b, -4096(%rdx), %ymm24, %ymm25
+
+// CHECK: valignq $123, -4128(%rdx), %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x03,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ valignq $0x7b, -4128(%rdx), %ymm24, %ymm25
+
+// CHECK: valignq $123, 1016(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x4a,0x7f,0x7b]
+ valignq $0x7b, 1016(%rdx){1to4}, %ymm24, %ymm25
+
+// CHECK: valignq $123, 1024(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0x00,0x04,0x00,0x00,0x7b]
+ valignq $0x7b, 1024(%rdx){1to4}, %ymm24, %ymm25
+
+// CHECK: valignq $123, -1024(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x4a,0x80,0x7b]
+ valignq $0x7b, -1024(%rdx){1to4}, %ymm24, %ymm25
+
+// CHECK: valignq $123, -1032(%rdx){1to4}, %ymm24, %ymm25
+// CHECK: encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0xf8,0xfb,0xff,0xff,0x7b]
+ valignq $0x7b, -1032(%rdx){1to4}, %ymm24, %ymm25
OpenPOWER on IntegriCloud