summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2015-11-15 07:23:13 +0000
committerIgor Breger <igor.breger@intel.com>2015-11-15 07:23:13 +0000
commitaa40ddd3ba7b989b2d391e48b7853a8b30f190a0 (patch)
treed23fead61aa9f0010493e34fe5cf95f9c01be590 /llvm/test
parent80321b8cc231e518445ecf1b5a7b14d997c21d09 (diff)
downloadbcm5719-llvm-aa40ddd3ba7b989b2d391e48b7853a8b30f190a0.tar.gz
bcm5719-llvm-aa40ddd3ba7b989b2d391e48b7853a8b30f190a0.zip
AVX512: Implemented encoding and intrinsics for VMOVSHDUP/VMOVSLDUP instructions.
Differential Revision: http://reviews.llvm.org/D14322 llvm-svn: 253160
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/X86/avx-isa-check.ll24
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll46
-rw-r--r--llvm/test/CodeGen/X86/avx512vl-intrinsics.ll97
-rw-r--r--llvm/test/MC/X86/avx512-encodings.s72
-rw-r--r--llvm/test/MC/X86/x86-64-avx512f_vl.s145
5 files changed, 384 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx-isa-check.ll b/llvm/test/CodeGen/X86/avx-isa-check.ll
index d295ffd3048..e8426b67ecb 100644
--- a/llvm/test/CodeGen/X86/avx-isa-check.ll
+++ b/llvm/test/CodeGen/X86/avx-isa-check.ll
@@ -344,6 +344,30 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
ret <16 x i16> %shuffle
}
+define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
+; vmovshdup 256 test
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+ ret <8 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) {
+; vmovshdup 128 test
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
+ ret <4 x float> %shuffle
+}
+
+define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
+; vmovsldup 256 test
+ %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+ ret <8 x float> %shuffle
+}
+
+define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
+; vmovsldup 128 test
+ %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+ ret <4 x float> %shuffle
+}
+
define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) {
%a = load double, double* %ptr
%v = insertelement <2 x double> undef, double %a, i32 0
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 6ce1da55aa4..a16143f4eea 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -4676,3 +4676,49 @@ define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i6
ret <8 x i64> %res2
}
+declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovsldup %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: ## zmm1 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT: vmovsldup %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT: vmovsldup %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vmovshdup %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: ## zmm1 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT: vmovshdup %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT: vmovshdup %zmm0, %zmm0
+; CHECK-NEXT: ## zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
index f056c272135..bc51af03e4e 100644
--- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -5334,3 +5334,100 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0) {
}
declare <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float>, i32, <8 x i16>, i8) nounwind readonly
+
+declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovsldup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm0[0,0,2,2]
+; CHECK-NEXT: vmovsldup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = xmm0[0,0,2,2]
+; CHECK-NEXT: vmovsldup %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovsldup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vmovsldup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vmovsldup %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,0,2,2,4,4,6,6]
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
+
+define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovshdup %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: ## xmm1 = xmm0[1,1,3,3]
+; CHECK-NEXT: vmovshdup %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: ## xmm2 = xmm0[1,1,3,3]
+; CHECK-NEXT: vmovshdup %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[1,1,3,3]
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vaddps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res2, %res3
+ ret <4 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovshdup %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: ## ymm1 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT: vmovshdup %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT: vmovshdup %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[1,1,3,3,5,5,7,7]
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s
index 1d7ae1c7a38..cbc8f3fd064 100644
--- a/llvm/test/MC/X86/avx512-encodings.s
+++ b/llvm/test/MC/X86/avx512-encodings.s
@@ -18297,6 +18297,78 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0xc5,0xf9,0x7e,0xaa,0xfc,0xfd,0xff,0xff]
vmovd %xmm5, -516(%rdx)
+// CHECK: vmovshdup %zmm27, %zmm16
+// CHECK: encoding: [0x62,0x81,0x7e,0x48,0x16,0xc3]
+ vmovshdup %zmm27, %zmm16
+
+// CHECK: vmovshdup %zmm27, %zmm16 {%k4}
+// CHECK: encoding: [0x62,0x81,0x7e,0x4c,0x16,0xc3]
+ vmovshdup %zmm27, %zmm16 {%k4}
+
+// CHECK: vmovshdup %zmm27, %zmm16 {%k4} {z}
+// CHECK: encoding: [0x62,0x81,0x7e,0xcc,0x16,0xc3]
+ vmovshdup %zmm27, %zmm16 {%k4} {z}
+
+// CHECK: vmovshdup (%rcx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x01]
+ vmovshdup (%rcx), %zmm16
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %zmm16
+// CHECK: encoding: [0x62,0xa1,0x7e,0x48,0x16,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vmovshdup 291(%rax,%r14,8), %zmm16
+
+// CHECK: vmovshdup 8128(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x7f]
+ vmovshdup 8128(%rdx), %zmm16
+
+// CHECK: vmovshdup 8192(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0x00,0x20,0x00,0x00]
+ vmovshdup 8192(%rdx), %zmm16
+
+// CHECK: vmovshdup -8192(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x42,0x80]
+ vmovshdup -8192(%rdx), %zmm16
+
+// CHECK: vmovshdup -8256(%rdx), %zmm16
+// CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x16,0x82,0xc0,0xdf,0xff,0xff]
+ vmovshdup -8256(%rdx), %zmm16
+
+// CHECK: vmovsldup %zmm14, %zmm13
+// CHECK: encoding: [0x62,0x51,0x7e,0x48,0x12,0xee]
+ vmovsldup %zmm14, %zmm13
+
+// CHECK: vmovsldup %zmm14, %zmm13 {%k6}
+// CHECK: encoding: [0x62,0x51,0x7e,0x4e,0x12,0xee]
+ vmovsldup %zmm14, %zmm13 {%k6}
+
+// CHECK: vmovsldup %zmm14, %zmm13 {%k6} {z}
+// CHECK: encoding: [0x62,0x51,0x7e,0xce,0x12,0xee]
+ vmovsldup %zmm14, %zmm13 {%k6} {z}
+
+// CHECK: vmovsldup (%rcx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x29]
+ vmovsldup (%rcx), %zmm13
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %zmm13
+// CHECK: encoding: [0x62,0x31,0x7e,0x48,0x12,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vmovsldup 291(%rax,%r14,8), %zmm13
+
+// CHECK: vmovsldup 8128(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x7f]
+ vmovsldup 8128(%rdx), %zmm13
+
+// CHECK: vmovsldup 8192(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0x00,0x20,0x00,0x00]
+ vmovsldup 8192(%rdx), %zmm13
+
+// CHECK: vmovsldup -8192(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0x6a,0x80]
+ vmovsldup -8192(%rdx), %zmm13
+
+// CHECK: vmovsldup -8256(%rdx), %zmm13
+// CHECK: encoding: [0x62,0x71,0x7e,0x48,0x12,0xaa,0xc0,0xdf,0xff,0xff]
+ vmovsldup -8256(%rdx), %zmm13
+
// CHECK: vmovlps (%rcx), %xmm20, %xmm7
// CHECK: encoding: [0x62,0xf1,0x5c,0x00,0x12,0x39]
vmovlps (%rcx), %xmm20, %xmm7
diff --git a/llvm/test/MC/X86/x86-64-avx512f_vl.s b/llvm/test/MC/X86/x86-64-avx512f_vl.s
index 17645fe3f82..a7a28ac9b7f 100644
--- a/llvm/test/MC/X86/x86-64-avx512f_vl.s
+++ b/llvm/test/MC/X86/x86-64-avx512f_vl.s
@@ -21978,3 +21978,148 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: vcvtps2ph $123, %ymm30, -2064(%rdx)
// CHECK: encoding: [0x62,0x63,0x7d,0x28,0x1d,0xb2,0xf0,0xf7,0xff,0xff,0x7b]
vcvtps2ph $0x7b, %ymm30, -2064(%rdx)
+
+// CHECK: vmovshdup %xmm18, %xmm23
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x16,0xfa]
+ vmovshdup %xmm18, %xmm23
+
+// CHECK: vmovshdup %xmm18, %xmm23 {%k2}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x0a,0x16,0xfa]
+ vmovshdup %xmm18, %xmm23 {%k2}
+
+// CHECK: vmovshdup %xmm18, %xmm23 {%k2} {z}
+// CHECK: encoding: [0x62,0xa1,0x7e,0x8a,0x16,0xfa]
+ vmovshdup %xmm18, %xmm23 {%k2} {z}
+
+// CHECK: vmovshdup (%rcx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x39]
+ vmovshdup (%rcx), %xmm23
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %xmm23
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x16,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vmovshdup 291(%rax,%r14,8), %xmm23
+
+// CHECK: vmovshdup 2032(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x7f]
+ vmovshdup 2032(%rdx), %xmm23
+
+// CHECK: vmovshdup 2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0x00,0x08,0x00,0x00]
+ vmovshdup 2048(%rdx), %xmm23
+
+// CHECK: vmovshdup -2048(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0x7a,0x80]
+ vmovshdup -2048(%rdx), %xmm23
+
+// CHECK: vmovshdup -2064(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe1,0x7e,0x08,0x16,0xba,0xf0,0xf7,0xff,0xff]
+ vmovshdup -2064(%rdx), %xmm23
+
+// CHECK: vmovshdup %ymm24, %ymm18
+// CHECK: encoding: [0x62,0x81,0x7e,0x28,0x16,0xd0]
+ vmovshdup %ymm24, %ymm18
+
+// CHECK: vmovshdup %ymm24, %ymm18 {%k3}
+// CHECK: encoding: [0x62,0x81,0x7e,0x2b,0x16,0xd0]
+ vmovshdup %ymm24, %ymm18 {%k3}
+
+// CHECK: vmovshdup %ymm24, %ymm18 {%k3} {z}
+// CHECK: encoding: [0x62,0x81,0x7e,0xab,0x16,0xd0]
+ vmovshdup %ymm24, %ymm18 {%k3} {z}
+
+// CHECK: vmovshdup (%rcx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x11]
+ vmovshdup (%rcx), %ymm18
+
+// CHECK: vmovshdup 291(%rax,%r14,8), %ymm18
+// CHECK: encoding: [0x62,0xa1,0x7e,0x28,0x16,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vmovshdup 291(%rax,%r14,8), %ymm18
+
+// CHECK: vmovshdup 4064(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x7f]
+ vmovshdup 4064(%rdx), %ymm18
+
+// CHECK: vmovshdup 4096(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0x00,0x10,0x00,0x00]
+ vmovshdup 4096(%rdx), %ymm18
+
+// CHECK: vmovshdup -4096(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x52,0x80]
+ vmovshdup -4096(%rdx), %ymm18
+
+// CHECK: vmovshdup -4128(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe1,0x7e,0x28,0x16,0x92,0xe0,0xef,0xff,0xff]
+ vmovshdup -4128(%rdx), %ymm18
+
+// CHECK: vmovsldup %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x21,0x7e,0x08,0x12,0xcd]
+ vmovsldup %xmm21, %xmm25
+
+// CHECK: vmovsldup %xmm21, %xmm25 {%k5}
+// CHECK: encoding: [0x62,0x21,0x7e,0x0d,0x12,0xcd]
+ vmovsldup %xmm21, %xmm25 {%k5}
+
+// CHECK: vmovsldup %xmm21, %xmm25 {%k5} {z}
+// CHECK: encoding: [0x62,0x21,0x7e,0x8d,0x12,0xcd]
+ vmovsldup %xmm21, %xmm25 {%k5} {z}
+
+// CHECK: vmovsldup (%rcx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x09]
+ vmovsldup (%rcx), %xmm25
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %xmm25
+// CHECK: encoding: [0x62,0x21,0x7e,0x08,0x12,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vmovsldup 291(%rax,%r14,8), %xmm25
+
+// CHECK: vmovsldup 2032(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x7f]
+ vmovsldup 2032(%rdx), %xmm25
+
+// CHECK: vmovsldup 2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0x00,0x08,0x00,0x00]
+ vmovsldup 2048(%rdx), %xmm25
+
+// CHECK: vmovsldup -2048(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x4a,0x80]
+ vmovsldup -2048(%rdx), %xmm25
+
+// CHECK: vmovsldup -2064(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x61,0x7e,0x08,0x12,0x8a,0xf0,0xf7,0xff,0xff]
+ vmovsldup -2064(%rdx), %xmm25
+
+// CHECK: vmovsldup %ymm29, %ymm24
+// CHECK: encoding: [0x62,0x01,0x7e,0x28,0x12,0xc5]
+ vmovsldup %ymm29, %ymm24
+
+// CHECK: vmovsldup %ymm29, %ymm24 {%k5}
+// CHECK: encoding: [0x62,0x01,0x7e,0x2d,0x12,0xc5]
+ vmovsldup %ymm29, %ymm24 {%k5}
+
+// CHECK: vmovsldup %ymm29, %ymm24 {%k5} {z}
+// CHECK: encoding: [0x62,0x01,0x7e,0xad,0x12,0xc5]
+ vmovsldup %ymm29, %ymm24 {%k5} {z}
+
+// CHECK: vmovsldup (%rcx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x01]
+ vmovsldup (%rcx), %ymm24
+
+// CHECK: vmovsldup 291(%rax,%r14,8), %ymm24
+// CHECK: encoding: [0x62,0x21,0x7e,0x28,0x12,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vmovsldup 291(%rax,%r14,8), %ymm24
+
+// CHECK: vmovsldup 4064(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x7f]
+ vmovsldup 4064(%rdx), %ymm24
+
+// CHECK: vmovsldup 4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0x00,0x10,0x00,0x00]
+ vmovsldup 4096(%rdx), %ymm24
+
+// CHECK: vmovsldup -4096(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x42,0x80]
+ vmovsldup -4096(%rdx), %ymm24
+
+// CHECK: vmovsldup -4128(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x61,0x7e,0x28,0x12,0x82,0xe0,0xef,0xff,0xff]
+ vmovsldup -4128(%rdx), %ymm24
+
OpenPOWER on IntegriCloud