diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 183 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 22 | ||||
| -rw-r--r-- | llvm/test/MC/X86/avx512-encodings.s | 241 | ||||
| -rw-r--r-- | llvm/test/MC/X86/x86-64-avx512bw.s | 72 | ||||
| -rw-r--r-- | llvm/test/MC/X86/x86-64-avx512bw_vl.s | 143 | ||||
| -rw-r--r-- | llvm/test/MC/X86/x86-64-avx512f_vl.s | 896 | 
6 files changed, 1412 insertions, 145 deletions
| diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 68c18fd5ca7..028553ccd27 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1058,118 +1058,87 @@ def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))),            (VPERMILPDZri VR512:$src1, imm:$imm)>;  // -- VPERM2I - 3 source operands form -- -multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC, -                          PatFrag mem_frag, X86MemOperand x86memop, -                          SDNode OpNode, ValueType OpVT, RegisterClass KRC> { +multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, +                            SDNode OpNode, X86VectorVTInfo _> {  let Constraints = "$src1 = $dst" in { -  def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), -                   (ins RC:$src1, RC:$src2, RC:$src3), -                   !strconcat(OpcodeStr, -                       "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), -                   [(set RC:$dst, -                     (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, -                    EVEX_4V; - -  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), -                   (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), -                   !strconcat(OpcodeStr, -                       "\t{$src3, $src2, $dst {${mask}}|" -                       "$dst {${mask}}, $src2, $src3}"), -                   [(set RC:$dst, (OpVT (vselect KRC:$mask, -                                           (OpNode RC:$src1, RC:$src2, -                                              RC:$src3), -                                           RC:$src1)))]>, -                    EVEX_4V, EVEX_K; - -  let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> -    def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), -                   (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), -                   !strconcat(OpcodeStr, -                       "\t{$src3, $src2, $dst {${mask}} {z} |", -                       "$dst {${mask}} {z}, $src2, $src3}"), -                   [(set RC:$dst, (OpVT (vselect KRC:$mask, -                                           (OpNode RC:$src1, RC:$src2, -                                              RC:$src3), -                                           (OpVT (bitconvert -                                              (v16i32 immAllZerosV))))))]>, -                    EVEX_4V, EVEX_KZ; +  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst), +          (ins _.RC:$src2, _.RC:$src3), +          OpcodeStr, "$src3, $src2", "$src2, $src3", +          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V, +         AVX5128IBase; -  def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), -                   (ins RC:$src1, RC:$src2, x86memop:$src3), -                   !strconcat(OpcodeStr, -                    "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), -                   [(set RC:$dst, -                     (OpVT (OpNode RC:$src1, RC:$src2, -                      (mem_frag addr:$src3))))]>, EVEX_4V; +  let mayLoad = 1 in +  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), +            (ins _.RC:$src2, _.MemOp:$src3), +            OpcodeStr, "$src3, $src2", "$src2, $src3", +            (_.VT (OpNode _.RC:$src1, _.RC:$src2, +                   (_.VT (bitconvert (_.LdFrag addr:$src3)))))>, +            EVEX_4V, AVX5128IBase; +  } +} +multiclass avx512_perm_3src_mb<bits<8> opc, string OpcodeStr, +                               SDNode OpNode, X86VectorVTInfo _> { +  let mayLoad = 1, Constraints = "$src1 = $dst" in +  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst), +              (ins _.RC:$src2, _.ScalarMemOp:$src3), +              OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"), +              !strconcat("$src2, ${src3}", _.BroadcastStr ), +              (_.VT (OpNode _.RC:$src1, +               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,  +              AVX5128IBase, EVEX_4V, EVEX_B; +} -  def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), -                   (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), -                   !strconcat(OpcodeStr, -                    "\t{$src3, $src2, $dst {${mask}}|" -                    "$dst {${mask}}, $src2, $src3}"), -                   [(set RC:$dst, -                       (OpVT (vselect KRC:$mask, -                                      (OpNode RC:$src1, RC:$src2, -                                         (mem_frag addr:$src3)), -                                      RC:$src1)))]>, -                    EVEX_4V, EVEX_K; - -  let AddedComplexity = 10 in // Prefer over the rrkz variant -    def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), -                   (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), -                   !strconcat(OpcodeStr, -                    "\t{$src3, $src2, $dst {${mask}} {z}|" -                    "$dst {${mask}} {z}, $src2, $src3}"), -                   [(set RC:$dst, -                     (OpVT (vselect KRC:$mask, -                                    (OpNode RC:$src1, RC:$src2, -                                            (mem_frag addr:$src3)), -                                    (OpVT (bitconvert -                                       (v16i32 immAllZerosV))))))]>, -                    EVEX_4V, EVEX_KZ; +multiclass avx512_perm_3src_sizes<bits<8> opc, string OpcodeStr, +                                  SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { +  let Predicates = [HasAVX512] in +  defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,  +            avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, EVEX_V512; +  let Predicates = [HasVLX] in { +  defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,  +                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>, +                 EVEX_V128; +  defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,  +                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>, +                 EVEX_V256; +  } +} +multiclass avx512_perm_3src_sizes_w<bits<8> opc, string OpcodeStr,  +                                   SDNode OpNode, AVX512VLVectorVTInfo VTInfo> { +  let Predicates = [HasBWI] in +  defm NAME: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info512>,  +             avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info512>, +             EVEX_V512; +  let Predicates = [HasBWI, HasVLX] in { +  defm NAME#128: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info128>,  +                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info128>, +                 EVEX_V128; +  defm NAME#256: avx512_perm_3src<opc, OpcodeStr, OpNode, VTInfo.info256>,  +                 avx512_perm_3src_mb<opc, OpcodeStr, OpNode, VTInfo.info256>, +                 EVEX_V256;    }  } -defm VPERMI2D  : avx512_perm_3src<0x76, "vpermi2d",  VR512, loadv16i32, -                                  i512mem, X86VPermiv3, v16i32, VK16WM>, -                 EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q  : avx512_perm_3src<0x76, "vpermi2q",  VR512, loadv8i64, -                                  i512mem, X86VPermiv3, v8i64, VK8WM>, -                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps",  VR512, loadv16f32, -                                  i512mem, X86VPermiv3, v16f32, VK16WM>, -                 EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd",  VR512, loadv8f64, -                                  i512mem, X86VPermiv3, v8f64, VK8WM>, -                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC, -                          PatFrag mem_frag, X86MemOperand x86memop, -                          SDNode OpNode, ValueType OpVT, RegisterClass KRC, -                          ValueType MaskVT, RegisterClass MRC> : -        avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode, -                         OpVT, KRC> { -  def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512") -                     VR512:$idx, VR512:$src1, VR512:$src2, -1)), -            (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>; - -  def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512") -                     VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)), -            (!cast<Instruction>(NAME#rrk) VR512:$src1, -              (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; -} - -defm VPERMT2D  : avx512_perm_table_3src<0x7E, "d",  VR512, loadv16i32, i512mem, -                               X86VPermv3, v16i32, VK16WM, v16i1, GR16>, -                 EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q  : avx512_perm_table_3src<0x7E, "q",  VR512, loadv8i64, i512mem, -                               X86VPermv3, v8i64, VK8WM, v8i1, GR8>, -                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps",  VR512, loadv16f32, i512mem, -                               X86VPermv3, v16f32, VK16WM, v16i1, GR16>, -                 EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd",  VR512, loadv8f64, i512mem, -                               X86VPermv3, v8f64, VK8WM, v8i1, GR8>, -                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2D  : avx512_perm_3src_sizes<0x76, "vpermi2d", X86VPermiv3, +                                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q  : avx512_perm_3src_sizes<0x76, "vpermi2q", X86VPermiv3, +                                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src_sizes<0x77, "vpermi2ps", X86VPermiv3, +                                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src_sizes<0x77, "vpermi2pd", X86VPermiv3, +                                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPERMT2D  : avx512_perm_3src_sizes<0x7E, "vpermt2d", X86VPermv3, +                                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q  : avx512_perm_3src_sizes<0x7E, "vpermt2q", X86VPermv3, +                                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2PS : avx512_perm_3src_sizes<0x7F, "vpermt2ps", X86VPermv3, +                                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_3src_sizes<0x7F, "vpermt2pd", X86VPermv3, +                                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>; + +defm VPERMT2W  : avx512_perm_3src_sizes_w<0x7D, "vpermt2w", X86VPermv3, +                                  avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>; +defm VPERMI2W  : avx512_perm_3src_sizes_w<0x75, "vpermi2w", X86VPermiv3, +                                  avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;  //===----------------------------------------------------------------------===//  // AVX-512 - BLEND using mask diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 544b3a0b0cb..a06cadaa3f5 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -599,28 +599,6 @@ define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask    ret <8 x double> %res  } -define <16 x float> @test_vpermt2ps(<16 x float>%x, <16 x float>%y, <16 x i32>%perm) { -; CHECK: vpermt2ps {{.*}}encoding: [0x62,0xf2,0x6d,0x48,0x7f,0xc1] -  %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 -1) -  ret <16 x float> %res -} - -define <16 x float> @test_vpermt2ps_mask(<16 x float>%x, <16 x float>%y, <16 x i32>%perm, i16 %mask) { -; CHECK-LABEL: test_vpermt2ps_mask: -; CHECK: vpermt2ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x7f,0xc1] -  %res = call <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>%perm, <16 x float>%x, <16 x float>%y, i16 %mask) -  ret <16 x float> %res -} - -declare <16 x float> @llvm.x86.avx512.mask.vpermt.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) - -define <8 x i64> @test_vmovntdqa(i8 *%x) { -; CHECK-LABEL: test_vmovntdqa: -; CHECK: vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07] -  %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %x) -  ret <8 x i64> %res -} -  declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)  define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) { diff --git a/llvm/test/MC/X86/avx512-encodings.s b/llvm/test/MC/X86/avx512-encodings.s index 83789c78d6e..e52dfac1976 100644 --- a/llvm/test/MC/X86/avx512-encodings.s +++ b/llvm/test/MC/X86/avx512-encodings.s @@ -6060,22 +6060,6 @@ vpcmpd $1, %zmm24, %zmm7, %k5{%k4}  // CHECK: encoding: [0x62,0xf3,0xf5,0x47,0x1e,0x72,0x01,0x02]  vpcmpuq $2, 0x40(%rdx), %zmm17, %k6{%k7} -// CHECK: vpermi2d -// CHECK: encoding: [0x62,0x42,0x6d,0x4b,0x76,0xd6] -vpermi2d %zmm14, %zmm2, %zmm26 {%k3} - -// CHECK: vpermt2pd -// CHECK: encoding: [0x62,0xf2,0xcd,0xc6,0x7f,0xf3] -vpermt2pd %zmm3, %zmm22, %zmm6 {%k6} {z} - -// CHECK: vpermi2q -// CHECK: encoding: [0x62,0x62,0xed,0x4b,0x76,0x54,0x58,0x02] -vpermi2q 0x80(%rax,%rbx,2), %zmm2, %zmm26 {%k3} - -// CHECK: vpermt2d -// CHECK: encoding: [0x62,0x32,0x4d,0xc2,0x7e,0x24,0xad,0x05,0x00,0x00,0x00]	 -vpermt2d 5(,%r13,4), %zmm22, %zmm12 {%k2} {z} -  // CHECK: valignq $2  // CHECK: encoding: [0x62,0xf3,0xfd,0x48,0x03,0x4c,0x24,0x04,0x02]  valignq  $2, 0x100(%rsp), %zmm0, %zmm1 @@ -9305,3 +9289,228 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2  // CHECK: vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14  // CHECK:  encoding: [0x62,0x71,0xce,0x00,0x7b,0xb2,0xf8,0xfb,0xff,0xff]            vcvtusi2ssq -1032(%rdx), %xmm22, %xmm14 + +// CHECK: vpermi2d %zmm4, %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0xd4] +          vpermi2d %zmm4, %zmm28, %zmm10 + +// CHECK: vpermi2d %zmm4, %zmm28, %zmm10 {%k5} +// CHECK:  encoding: [0x62,0x72,0x1d,0x45,0x76,0xd4] +          vpermi2d %zmm4, %zmm28, %zmm10 {%k5} + +// CHECK: vpermi2d %zmm4, %zmm28, %zmm10 {%k5} {z} +// CHECK:  encoding: [0x62,0x72,0x1d,0xc5,0x76,0xd4] +          vpermi2d %zmm4, %zmm28, %zmm10 {%k5} {z} + +// CHECK: vpermi2d (%rcx), %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x11] +          vpermi2d (%rcx), %zmm28, %zmm10 + +// CHECK: vpermi2d 291(%rax,%r14,8), %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x32,0x1d,0x40,0x76,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermi2d 291(%rax,%r14,8), %zmm28, %zmm10 + +// CHECK: vpermi2d (%rcx){1to16}, %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x11] +          vpermi2d (%rcx){1to16}, %zmm28, %zmm10 + +// CHECK: vpermi2d 8128(%rdx), %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x52,0x7f] +          vpermi2d 8128(%rdx), %zmm28, %zmm10 + +// CHECK: vpermi2d 8192(%rdx), %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x92,0x00,0x20,0x00,0x00] +          vpermi2d 8192(%rdx), %zmm28, %zmm10 + +// CHECK: vpermi2d -8192(%rdx), %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x52,0x80] +          vpermi2d -8192(%rdx), %zmm28, %zmm10 + +// CHECK: vpermi2d -8256(%rdx), %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x40,0x76,0x92,0xc0,0xdf,0xff,0xff] +          vpermi2d -8256(%rdx), %zmm28, %zmm10 + +// CHECK: vpermi2d 508(%rdx){1to16}, %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x52,0x7f] +          vpermi2d 508(%rdx){1to16}, %zmm28, %zmm10 + +// CHECK: vpermi2d 512(%rdx){1to16}, %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x92,0x00,0x02,0x00,0x00] +          vpermi2d 512(%rdx){1to16}, %zmm28, %zmm10 + +// CHECK: vpermi2d -512(%rdx){1to16}, %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x52,0x80] +          vpermi2d -512(%rdx){1to16}, %zmm28, %zmm10 + +// CHECK: vpermi2d -516(%rdx){1to16}, %zmm28, %zmm10 +// CHECK:  encoding: [0x62,0x72,0x1d,0x50,0x76,0x92,0xfc,0xfd,0xff,0xff] +          vpermi2d -516(%rdx){1to16}, %zmm28, %zmm10 + +// CHECK: vpermi2q %zmm28, %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0x82,0x9d,0x40,0x76,0xd4] +          vpermi2q %zmm28, %zmm28, %zmm18 + +// CHECK: vpermi2q %zmm28, %zmm28, %zmm18 {%k2} +// CHECK:  encoding: [0x62,0x82,0x9d,0x42,0x76,0xd4] +          vpermi2q %zmm28, %zmm28, %zmm18 {%k2} + +// CHECK: vpermi2q %zmm28, %zmm28, %zmm18 {%k2} {z} +// CHECK:  encoding: [0x62,0x82,0x9d,0xc2,0x76,0xd4] +          vpermi2q %zmm28, %zmm28, %zmm18 {%k2} {z} + +// CHECK: vpermi2q (%rcx), %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x11] +          vpermi2q (%rcx), %zmm28, %zmm18 + +// CHECK: vpermi2q 291(%rax,%r14,8), %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xa2,0x9d,0x40,0x76,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermi2q 291(%rax,%r14,8), %zmm28, %zmm18 + +// CHECK: vpermi2q (%rcx){1to8}, %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x11] +          vpermi2q (%rcx){1to8}, %zmm28, %zmm18 + +// CHECK: vpermi2q 8128(%rdx), %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x52,0x7f] +          vpermi2q 8128(%rdx), %zmm28, %zmm18 + +// CHECK: vpermi2q 8192(%rdx), %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x92,0x00,0x20,0x00,0x00] +          vpermi2q 8192(%rdx), %zmm28, %zmm18 + +// CHECK: vpermi2q -8192(%rdx), %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x52,0x80] +          vpermi2q -8192(%rdx), %zmm28, %zmm18 + +// CHECK: vpermi2q -8256(%rdx), %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x40,0x76,0x92,0xc0,0xdf,0xff,0xff] +          vpermi2q -8256(%rdx), %zmm28, %zmm18 + +// CHECK: vpermi2q 1016(%rdx){1to8}, %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x52,0x7f] +          vpermi2q 1016(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vpermi2q 1024(%rdx){1to8}, %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x92,0x00,0x04,0x00,0x00] +          vpermi2q 1024(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vpermi2q -1024(%rdx){1to8}, %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x52,0x80] +          vpermi2q -1024(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vpermi2q -1032(%rdx){1to8}, %zmm28, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x50,0x76,0x92,0xf8,0xfb,0xff,0xff] +          vpermi2q -1032(%rdx){1to8}, %zmm28, %zmm18 + +// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x42,0x45,0x40,0x77,0xc0] +          vpermi2ps %zmm8, %zmm23, %zmm24 + +// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} +// CHECK:  encoding: [0x62,0x42,0x45,0x42,0x77,0xc0] +          vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} + +// CHECK: vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} {z} +// CHECK:  encoding: [0x62,0x42,0x45,0xc2,0x77,0xc0] +          vpermi2ps %zmm8, %zmm23, %zmm24 {%k2} {z} + +// CHECK: vpermi2ps (%rcx), %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x01] +          vpermi2ps (%rcx), %zmm23, %zmm24 + +// CHECK: vpermi2ps 291(%rax,%r14,8), %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x22,0x45,0x40,0x77,0x84,0xf0,0x23,0x01,0x00,0x00] +          vpermi2ps 291(%rax,%r14,8), %zmm23, %zmm24 + +// CHECK: vpermi2ps (%rcx){1to16}, %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x01] +          vpermi2ps (%rcx){1to16}, %zmm23, %zmm24 + +// CHECK: vpermi2ps 8128(%rdx), %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x42,0x7f] +          vpermi2ps 8128(%rdx), %zmm23, %zmm24 + +// CHECK: vpermi2ps 8192(%rdx), %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x82,0x00,0x20,0x00,0x00] +          vpermi2ps 8192(%rdx), %zmm23, %zmm24 + +// CHECK: vpermi2ps -8192(%rdx), %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x42,0x80] +          vpermi2ps -8192(%rdx), %zmm23, %zmm24 + +// CHECK: vpermi2ps -8256(%rdx), %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x40,0x77,0x82,0xc0,0xdf,0xff,0xff] +          vpermi2ps -8256(%rdx), %zmm23, %zmm24 + +// CHECK: vpermi2ps 508(%rdx){1to16}, %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x42,0x7f] +          vpermi2ps 508(%rdx){1to16}, %zmm23, %zmm24 + +// CHECK: vpermi2ps 512(%rdx){1to16}, %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x82,0x00,0x02,0x00,0x00] +          vpermi2ps 512(%rdx){1to16}, %zmm23, %zmm24 + +// CHECK: vpermi2ps -512(%rdx){1to16}, %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x42,0x80] +          vpermi2ps -512(%rdx){1to16}, %zmm23, %zmm24 + +// CHECK: vpermi2ps -516(%rdx){1to16}, %zmm23, %zmm24 +// CHECK:  encoding: [0x62,0x62,0x45,0x50,0x77,0x82,0xfc,0xfd,0xff,0xff] +          vpermi2ps -516(%rdx){1to16}, %zmm23, %zmm24 + +// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xa2,0xd5,0x48,0x77,0xe4] +          vpermi2pd %zmm20, %zmm5, %zmm20 + +// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} +// CHECK:  encoding: [0x62,0xa2,0xd5,0x4b,0x77,0xe4] +          vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} + +// CHECK: vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} {z} +// CHECK:  encoding: [0x62,0xa2,0xd5,0xcb,0x77,0xe4] +          vpermi2pd %zmm20, %zmm5, %zmm20 {%k3} {z} + +// CHECK: vpermi2pd (%rcx), %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x21] +          vpermi2pd (%rcx), %zmm5, %zmm20 + +// CHECK: vpermi2pd 291(%rax,%r14,8), %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xa2,0xd5,0x48,0x77,0xa4,0xf0,0x23,0x01,0x00,0x00] +          vpermi2pd 291(%rax,%r14,8), %zmm5, %zmm20 + +// CHECK: vpermi2pd (%rcx){1to8}, %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x21] +          vpermi2pd (%rcx){1to8}, %zmm5, %zmm20 + +// CHECK: vpermi2pd 8128(%rdx), %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x62,0x7f] +          vpermi2pd 8128(%rdx), %zmm5, %zmm20 + +// CHECK: vpermi2pd 8192(%rdx), %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0xa2,0x00,0x20,0x00,0x00] +          vpermi2pd 8192(%rdx), %zmm5, %zmm20 + +// CHECK: vpermi2pd -8192(%rdx), %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0x62,0x80] +          vpermi2pd -8192(%rdx), %zmm5, %zmm20 + +// CHECK: vpermi2pd -8256(%rdx), %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x48,0x77,0xa2,0xc0,0xdf,0xff,0xff] +          vpermi2pd -8256(%rdx), %zmm5, %zmm20 + +// CHECK: vpermi2pd 1016(%rdx){1to8}, %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x62,0x7f] +          vpermi2pd 1016(%rdx){1to8}, %zmm5, %zmm20 + +// CHECK: vpermi2pd 1024(%rdx){1to8}, %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0x00,0x04,0x00,0x00] +          vpermi2pd 1024(%rdx){1to8}, %zmm5, %zmm20 + +// CHECK: vpermi2pd -1024(%rdx){1to8}, %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0x62,0x80] +          vpermi2pd -1024(%rdx){1to8}, %zmm5, %zmm20 + +// CHECK: vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20 +// CHECK:  encoding: [0x62,0xe2,0xd5,0x58,0x77,0xa2,0xf8,0xfb,0xff,0xff] +          vpermi2pd -1032(%rdx){1to8}, %zmm5, %zmm20 + diff --git a/llvm/test/MC/X86/x86-64-avx512bw.s b/llvm/test/MC/X86/x86-64-avx512bw.s index b81e3adffd2..132948527b5 100644 --- a/llvm/test/MC/X86/x86-64-avx512bw.s +++ b/llvm/test/MC/X86/x86-64-avx512bw.s @@ -3343,3 +3343,75 @@  // CHECK: vpermw -8256(%rdx), %zmm19, %zmm22  // CHECK:  encoding: [0x62,0xe2,0xe5,0x40,0x8d,0xb2,0xc0,0xdf,0xff,0xff]            vpermw -8256(%rdx), %zmm19, %zmm22 + +// CHECK: vpermi2w %zmm24, %zmm24, %zmm17 +// CHECK:  encoding: [0x62,0x82,0xbd,0x40,0x75,0xc8] +          vpermi2w %zmm24, %zmm24, %zmm17 + +// CHECK: vpermi2w %zmm24, %zmm24, %zmm17 {%k7} +// CHECK:  encoding: [0x62,0x82,0xbd,0x47,0x75,0xc8] +          vpermi2w %zmm24, %zmm24, %zmm17 {%k7} + +// CHECK: vpermi2w %zmm24, %zmm24, %zmm17 {%k7} {z} +// CHECK:  encoding: [0x62,0x82,0xbd,0xc7,0x75,0xc8] +          vpermi2w %zmm24, %zmm24, %zmm17 {%k7} {z} + +// CHECK: vpermi2w (%rcx), %zmm24, %zmm17 +// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x75,0x09] +          vpermi2w (%rcx), %zmm24, %zmm17 + +// CHECK: vpermi2w 291(%rax,%r14,8), %zmm24, %zmm17 +// CHECK:  encoding: [0x62,0xa2,0xbd,0x40,0x75,0x8c,0xf0,0x23,0x01,0x00,0x00] +          vpermi2w 291(%rax,%r14,8), %zmm24, %zmm17 + +// CHECK: vpermi2w 8128(%rdx), %zmm24, %zmm17 +// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x75,0x4a,0x7f] +          vpermi2w 8128(%rdx), %zmm24, %zmm17 + +// CHECK: vpermi2w 8192(%rdx), %zmm24, %zmm17 +// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x75,0x8a,0x00,0x20,0x00,0x00] +          vpermi2w 8192(%rdx), %zmm24, %zmm17 + +// CHECK: vpermi2w -8192(%rdx), %zmm24, %zmm17 +// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x75,0x4a,0x80] +          vpermi2w -8192(%rdx), %zmm24, %zmm17 + +// CHECK: vpermi2w -8256(%rdx), %zmm24, %zmm17 +// CHECK:  encoding: [0x62,0xe2,0xbd,0x40,0x75,0x8a,0xc0,0xdf,0xff,0xff] +          vpermi2w -8256(%rdx), %zmm24, %zmm17 + +// CHECK: vpermt2w %zmm19, %zmm25, %zmm18 +// CHECK:  encoding: [0x62,0xa2,0xb5,0x40,0x7d,0xd3] +          vpermt2w %zmm19, %zmm25, %zmm18 + +// CHECK: vpermt2w %zmm19, %zmm25, %zmm18 {%k2} +// CHECK:  encoding: [0x62,0xa2,0xb5,0x42,0x7d,0xd3] +          vpermt2w %zmm19, %zmm25, %zmm18 {%k2} + +// CHECK: vpermt2w %zmm19, %zmm25, %zmm18 {%k2} {z} +// CHECK:  encoding: [0x62,0xa2,0xb5,0xc2,0x7d,0xd3] +          vpermt2w %zmm19, %zmm25, %zmm18 {%k2} {z} + +// CHECK: vpermt2w (%rcx), %zmm25, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x7d,0x11] +          vpermt2w (%rcx), %zmm25, %zmm18 + +// CHECK: vpermt2w 291(%rax,%r14,8), %zmm25, %zmm18 +// CHECK:  encoding: [0x62,0xa2,0xb5,0x40,0x7d,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermt2w 291(%rax,%r14,8), %zmm25, %zmm18 + +// CHECK: vpermt2w 8128(%rdx), %zmm25, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x7d,0x52,0x7f] +          vpermt2w 8128(%rdx), %zmm25, %zmm18 + +// CHECK: vpermt2w 8192(%rdx), %zmm25, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x7d,0x92,0x00,0x20,0x00,0x00] +          vpermt2w 8192(%rdx), %zmm25, %zmm18 + +// CHECK: vpermt2w -8192(%rdx), %zmm25, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x7d,0x52,0x80] +          vpermt2w -8192(%rdx), %zmm25, %zmm18 + +// CHECK: vpermt2w -8256(%rdx), %zmm25, %zmm18 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x40,0x7d,0x92,0xc0,0xdf,0xff,0xff] +          vpermt2w -8256(%rdx), %zmm25, %zmm18 diff --git a/llvm/test/MC/X86/x86-64-avx512bw_vl.s b/llvm/test/MC/X86/x86-64-avx512bw_vl.s index 0ba5e17077b..1f37fc945f9 100644 --- a/llvm/test/MC/X86/x86-64-avx512bw_vl.s +++ b/llvm/test/MC/X86/x86-64-avx512bw_vl.s @@ -5936,3 +5936,146 @@  // CHECK:  encoding: [0x62,0x61,0xff,0x28,0x70,0x8a,0xe0,0xef,0xff,0xff,0x7b]            vpshuflw $123, -4128(%rdx), %ymm25 +// CHECK: vpermi2w %xmm21, %xmm29, %xmm19 +// CHECK:  encoding: [0x62,0xa2,0x95,0x00,0x75,0xdd] +          vpermi2w %xmm21, %xmm29, %xmm19 + +// CHECK: vpermi2w %xmm21, %xmm29, %xmm19 {%k2} +// CHECK:  encoding: [0x62,0xa2,0x95,0x02,0x75,0xdd] +          vpermi2w %xmm21, %xmm29, %xmm19 {%k2} + +// CHECK: vpermi2w %xmm21, %xmm29, %xmm19 {%k2} {z} +// CHECK:  encoding: [0x62,0xa2,0x95,0x82,0x75,0xdd] +          vpermi2w %xmm21, %xmm29, %xmm19 {%k2} {z} + +// CHECK: vpermi2w (%rcx), %xmm29, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x95,0x00,0x75,0x19] +          vpermi2w (%rcx), %xmm29, %xmm19 + +// CHECK: vpermi2w 291(%rax,%r14,8), %xmm29, %xmm19 +// CHECK:  encoding: [0x62,0xa2,0x95,0x00,0x75,0x9c,0xf0,0x23,0x01,0x00,0x00] +          vpermi2w 291(%rax,%r14,8), %xmm29, %xmm19 + +// CHECK: vpermi2w 2032(%rdx), %xmm29, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x95,0x00,0x75,0x5a,0x7f] +          vpermi2w 2032(%rdx), %xmm29, %xmm19 + +// CHECK: vpermi2w 2048(%rdx), %xmm29, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x95,0x00,0x75,0x9a,0x00,0x08,0x00,0x00] +          vpermi2w 2048(%rdx), %xmm29, %xmm19 + +// CHECK: vpermi2w -2048(%rdx), %xmm29, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x95,0x00,0x75,0x5a,0x80] +          vpermi2w -2048(%rdx), %xmm29, %xmm19 + +// CHECK: vpermi2w -2064(%rdx), %xmm29, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x95,0x00,0x75,0x9a,0xf0,0xf7,0xff,0xff] +          vpermi2w -2064(%rdx), %xmm29, %xmm19 + +// CHECK: vpermi2w %ymm19, %ymm25, %ymm30 +// CHECK:  encoding: [0x62,0x22,0xb5,0x20,0x75,0xf3] +          vpermi2w %ymm19, %ymm25, %ymm30 + +// CHECK: vpermi2w %ymm19, %ymm25, %ymm30 {%k3} +// CHECK:  encoding: [0x62,0x22,0xb5,0x23,0x75,0xf3] +          vpermi2w %ymm19, %ymm25, %ymm30 {%k3} + +// CHECK: vpermi2w %ymm19, %ymm25, %ymm30 {%k3} {z} +// CHECK:  encoding: [0x62,0x22,0xb5,0xa3,0x75,0xf3] +          vpermi2w %ymm19, %ymm25, %ymm30 {%k3} {z} + +// CHECK: vpermi2w (%rcx), %ymm25, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x75,0x31] +          vpermi2w (%rcx), %ymm25, %ymm30 + +// CHECK: vpermi2w 291(%rax,%r14,8), %ymm25, %ymm30 +// CHECK:  encoding: [0x62,0x22,0xb5,0x20,0x75,0xb4,0xf0,0x23,0x01,0x00,0x00] +          vpermi2w 291(%rax,%r14,8), %ymm25, %ymm30 + +// CHECK: vpermi2w 4064(%rdx), %ymm25, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x75,0x72,0x7f] +          vpermi2w 4064(%rdx), %ymm25, %ymm30 + +// CHECK: vpermi2w 4096(%rdx), %ymm25, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x75,0xb2,0x00,0x10,0x00,0x00] +          vpermi2w 4096(%rdx), %ymm25, %ymm30 + +// CHECK: vpermi2w -4096(%rdx), %ymm25, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x75,0x72,0x80] +          vpermi2w -4096(%rdx), %ymm25, %ymm30 + +// CHECK: vpermi2w -4128(%rdx), %ymm25, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xb5,0x20,0x75,0xb2,0xe0,0xef,0xff,0xff] +          vpermi2w -4128(%rdx), %ymm25, %ymm30 + +// CHECK: vpermt2w %xmm25, %xmm22, %xmm18 +// CHECK:  encoding: [0x62,0x82,0xcd,0x00,0x7d,0xd1] +          vpermt2w %xmm25, %xmm22, %xmm18 + +// CHECK: vpermt2w %xmm25, %xmm22, %xmm18 {%k6} +// CHECK:  encoding: [0x62,0x82,0xcd,0x06,0x7d,0xd1] +          vpermt2w %xmm25, %xmm22, %xmm18 {%k6} + +// CHECK: vpermt2w %xmm25, %xmm22, %xmm18 {%k6} {z} +// CHECK:  encoding: [0x62,0x82,0xcd,0x86,0x7d,0xd1] +          vpermt2w %xmm25, %xmm22, %xmm18 {%k6} {z} + +// CHECK: vpermt2w (%rcx), %xmm22, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x7d,0x11] +          vpermt2w (%rcx), %xmm22, %xmm18 + +// CHECK: vpermt2w 291(%rax,%r14,8), %xmm22, %xmm18 +// CHECK:  encoding: [0x62,0xa2,0xcd,0x00,0x7d,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermt2w 291(%rax,%r14,8), %xmm22, %xmm18 + +// CHECK: vpermt2w 2032(%rdx), %xmm22, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x7d,0x52,0x7f] +          vpermt2w 2032(%rdx), %xmm22, %xmm18 + +// CHECK: vpermt2w 2048(%rdx), %xmm22, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x7d,0x92,0x00,0x08,0x00,0x00] +          vpermt2w 2048(%rdx), %xmm22, %xmm18 + +// CHECK: vpermt2w -2048(%rdx), %xmm22, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x7d,0x52,0x80] +          vpermt2w -2048(%rdx), %xmm22, %xmm18 + +// CHECK: vpermt2w -2064(%rdx), %xmm22, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0xcd,0x00,0x7d,0x92,0xf0,0xf7,0xff,0xff] +          vpermt2w -2064(%rdx), %xmm22, %xmm18 + +// CHECK: vpermt2w %ymm26, %ymm23, %ymm28 +// CHECK:  encoding: [0x62,0x02,0xc5,0x20,0x7d,0xe2] +          vpermt2w %ymm26, %ymm23, %ymm28 + +// CHECK: vpermt2w %ymm26, %ymm23, %ymm28 {%k4} +// CHECK:  encoding: [0x62,0x02,0xc5,0x24,0x7d,0xe2] +          vpermt2w %ymm26, %ymm23, %ymm28 {%k4} + +// CHECK: vpermt2w %ymm26, %ymm23, %ymm28 {%k4} {z} +// CHECK:  encoding: [0x62,0x02,0xc5,0xa4,0x7d,0xe2] +          vpermt2w %ymm26, %ymm23, %ymm28 {%k4} {z} + +// CHECK: vpermt2w (%rcx), %ymm23, %ymm28 +// CHECK:  encoding: [0x62,0x62,0xc5,0x20,0x7d,0x21] +          vpermt2w (%rcx), %ymm23, %ymm28 + +// CHECK: vpermt2w 291(%rax,%r14,8), %ymm23, %ymm28 +// CHECK:  encoding: [0x62,0x22,0xc5,0x20,0x7d,0xa4,0xf0,0x23,0x01,0x00,0x00] +          vpermt2w 291(%rax,%r14,8), %ymm23, %ymm28 + +// CHECK: vpermt2w 4064(%rdx), %ymm23, %ymm28 +// CHECK:  encoding: [0x62,0x62,0xc5,0x20,0x7d,0x62,0x7f] +          vpermt2w 4064(%rdx), %ymm23, %ymm28 + +// CHECK: vpermt2w 4096(%rdx), %ymm23, %ymm28 +// CHECK:  encoding: [0x62,0x62,0xc5,0x20,0x7d,0xa2,0x00,0x10,0x00,0x00] +          vpermt2w 4096(%rdx), %ymm23, %ymm28 + +// CHECK: vpermt2w -4096(%rdx), %ymm23, %ymm28 +// CHECK:  encoding: [0x62,0x62,0xc5,0x20,0x7d,0x62,0x80] +          vpermt2w -4096(%rdx), %ymm23, %ymm28 + +// CHECK: vpermt2w -4128(%rdx), %ymm23, %ymm28 +// CHECK:  encoding: [0x62,0x62,0xc5,0x20,0x7d,0xa2,0xe0,0xef,0xff,0xff] +          vpermt2w -4128(%rdx), %ymm23, %ymm28 diff --git a/llvm/test/MC/X86/x86-64-avx512f_vl.s b/llvm/test/MC/X86/x86-64-avx512f_vl.s index f521b3e42d4..1381b2e76e1 100644 --- a/llvm/test/MC/X86/x86-64-avx512f_vl.s +++ b/llvm/test/MC/X86/x86-64-avx512f_vl.s @@ -11132,3 +11132,899 @@ vaddpd  {rz-sae}, %zmm2, %zmm1, %zmm1  // CHECK: valignq $123, -1032(%rdx){1to4}, %ymm24, %ymm25  // CHECK:  encoding: [0x62,0x63,0xbd,0x30,0x03,0x8a,0xf8,0xfb,0xff,0xff,0x7b]            valignq $0x7b, -1032(%rdx){1to4}, %ymm24, %ymm25 + +// CHECK: vpermi2d %xmm25, %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0x82,0x45,0x00,0x76,0xe9] +          vpermi2d %xmm25, %xmm23, %xmm21 + +// CHECK: vpermi2d %xmm25, %xmm23, %xmm21 {%k6} +// CHECK:  encoding: [0x62,0x82,0x45,0x06,0x76,0xe9] +          vpermi2d %xmm25, %xmm23, %xmm21 {%k6} + +// CHECK: vpermi2d %xmm25, %xmm23, %xmm21 {%k6} {z} +// CHECK:  encoding: [0x62,0x82,0x45,0x86,0x76,0xe9] +          vpermi2d %xmm25, %xmm23, %xmm21 {%k6} {z} + +// CHECK: vpermi2d (%rcx), %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x76,0x29] +          vpermi2d (%rcx), %xmm23, %xmm21 + +// CHECK: vpermi2d 291(%rax,%r14,8), %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xa2,0x45,0x00,0x76,0xac,0xf0,0x23,0x01,0x00,0x00] +          vpermi2d 291(%rax,%r14,8), %xmm23, %xmm21 + +// CHECK: vpermi2d (%rcx){1to4}, %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x76,0x29] +          vpermi2d (%rcx){1to4}, %xmm23, %xmm21 + +// CHECK: vpermi2d 2032(%rdx), %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x76,0x6a,0x7f] +          vpermi2d 2032(%rdx), %xmm23, %xmm21 + +// CHECK: vpermi2d 2048(%rdx), %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x76,0xaa,0x00,0x08,0x00,0x00] +          vpermi2d 2048(%rdx), %xmm23, %xmm21 + +// CHECK: vpermi2d -2048(%rdx), %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x76,0x6a,0x80] +          vpermi2d -2048(%rdx), %xmm23, %xmm21 + +// CHECK: vpermi2d -2064(%rdx), %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x76,0xaa,0xf0,0xf7,0xff,0xff] +          vpermi2d -2064(%rdx), %xmm23, %xmm21 + +// CHECK: vpermi2d 508(%rdx){1to4}, %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x76,0x6a,0x7f] +          vpermi2d 508(%rdx){1to4}, %xmm23, %xmm21 + +// CHECK: vpermi2d 512(%rdx){1to4}, %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x76,0xaa,0x00,0x02,0x00,0x00] +          vpermi2d 512(%rdx){1to4}, %xmm23, %xmm21 + +// CHECK: vpermi2d -512(%rdx){1to4}, %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x76,0x6a,0x80] +          vpermi2d -512(%rdx){1to4}, %xmm23, %xmm21 + +// CHECK: vpermi2d -516(%rdx){1to4}, %xmm23, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x76,0xaa,0xfc,0xfd,0xff,0xff] +          vpermi2d -516(%rdx){1to4}, %xmm23, %xmm21 + +// CHECK: vpermi2d %ymm22, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0x76,0xd6] +          vpermi2d %ymm22, %ymm24, %ymm18 + +// CHECK: vpermi2d %ymm22, %ymm24, %ymm18 {%k1} +// CHECK:  encoding: [0x62,0xa2,0x3d,0x21,0x76,0xd6] +          vpermi2d %ymm22, %ymm24, %ymm18 {%k1} + +// CHECK: vpermi2d %ymm22, %ymm24, %ymm18 {%k1} {z} +// CHECK:  encoding: [0x62,0xa2,0x3d,0xa1,0x76,0xd6] +          vpermi2d %ymm22, %ymm24, %ymm18 {%k1} {z} + +// CHECK: vpermi2d (%rcx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x76,0x11] +          vpermi2d (%rcx), %ymm24, %ymm18 + +// CHECK: vpermi2d 291(%rax,%r14,8), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0x76,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermi2d 291(%rax,%r14,8), %ymm24, %ymm18 + +// CHECK: vpermi2d (%rcx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x76,0x11] +          vpermi2d (%rcx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2d 4064(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x76,0x52,0x7f] +          vpermi2d 4064(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2d 4096(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x76,0x92,0x00,0x10,0x00,0x00] +          vpermi2d 4096(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2d -4096(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x76,0x52,0x80] +          vpermi2d -4096(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2d -4128(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x76,0x92,0xe0,0xef,0xff,0xff] +          vpermi2d -4128(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2d 508(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x76,0x52,0x7f] +          vpermi2d 508(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2d 512(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x76,0x92,0x00,0x02,0x00,0x00] +          vpermi2d 512(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2d -512(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x76,0x52,0x80] +          vpermi2d -512(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2d -516(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x76,0x92,0xfc,0xfd,0xff,0xff] +          vpermi2d -516(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2q %xmm17, %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xa2,0x9d,0x00,0x76,0xd1] +          vpermi2q %xmm17, %xmm28, %xmm18 + +// CHECK: vpermi2q %xmm17, %xmm28, %xmm18 {%k3} +// CHECK:  encoding: [0x62,0xa2,0x9d,0x03,0x76,0xd1] +          vpermi2q %xmm17, %xmm28, %xmm18 {%k3} + +// CHECK: vpermi2q %xmm17, %xmm28, %xmm18 {%k3} {z} +// CHECK:  encoding: [0x62,0xa2,0x9d,0x83,0x76,0xd1] +          vpermi2q %xmm17, %xmm28, %xmm18 {%k3} {z} + +// CHECK: vpermi2q (%rcx), %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x00,0x76,0x11] +          vpermi2q (%rcx), %xmm28, %xmm18 + +// CHECK: vpermi2q 291(%rax,%r14,8), %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xa2,0x9d,0x00,0x76,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermi2q 291(%rax,%r14,8), %xmm28, %xmm18 + +// CHECK: vpermi2q (%rcx){1to2}, %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x10,0x76,0x11] +          vpermi2q (%rcx){1to2}, %xmm28, %xmm18 + +// CHECK: vpermi2q 2032(%rdx), %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x00,0x76,0x52,0x7f] +          vpermi2q 2032(%rdx), %xmm28, %xmm18 + +// CHECK: vpermi2q 2048(%rdx), %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x00,0x76,0x92,0x00,0x08,0x00,0x00] +          vpermi2q 2048(%rdx), %xmm28, %xmm18 + +// CHECK: vpermi2q -2048(%rdx), %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x00,0x76,0x52,0x80] +          vpermi2q -2048(%rdx), %xmm28, %xmm18 + +// CHECK: vpermi2q -2064(%rdx), %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x00,0x76,0x92,0xf0,0xf7,0xff,0xff] +          vpermi2q -2064(%rdx), %xmm28, %xmm18 + +// CHECK: vpermi2q 1016(%rdx){1to2}, %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x10,0x76,0x52,0x7f] +          vpermi2q 1016(%rdx){1to2}, %xmm28, %xmm18 + +// CHECK: vpermi2q 1024(%rdx){1to2}, %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x10,0x76,0x92,0x00,0x04,0x00,0x00] +          vpermi2q 1024(%rdx){1to2}, %xmm28, %xmm18 + +// CHECK: vpermi2q -1024(%rdx){1to2}, %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x10,0x76,0x52,0x80] +          vpermi2q -1024(%rdx){1to2}, %xmm28, %xmm18 + +// CHECK: vpermi2q -1032(%rdx){1to2}, %xmm28, %xmm18 +// CHECK:  encoding: [0x62,0xe2,0x9d,0x10,0x76,0x92,0xf8,0xfb,0xff,0xff] +          vpermi2q -1032(%rdx){1to2}, %xmm28, %xmm18 + +// CHECK: vpermi2q %ymm23, %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x22,0xcd,0x20,0x76,0xd7] +          vpermi2q %ymm23, %ymm22, %ymm26 + +// CHECK: vpermi2q %ymm23, %ymm22, %ymm26 {%k2} +// CHECK:  encoding: [0x62,0x22,0xcd,0x22,0x76,0xd7] +          vpermi2q %ymm23, %ymm22, %ymm26 {%k2} + +// CHECK: vpermi2q %ymm23, %ymm22, %ymm26 {%k2} {z} +// CHECK:  encoding: [0x62,0x22,0xcd,0xa2,0x76,0xd7] +          vpermi2q %ymm23, %ymm22, %ymm26 {%k2} {z} + +// CHECK: vpermi2q (%rcx), %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x76,0x11] +          vpermi2q (%rcx), %ymm22, %ymm26 + +// CHECK: vpermi2q 291(%rax,%r14,8), %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x22,0xcd,0x20,0x76,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermi2q 291(%rax,%r14,8), %ymm22, %ymm26 + +// CHECK: vpermi2q (%rcx){1to4}, %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x76,0x11] +          vpermi2q (%rcx){1to4}, %ymm22, %ymm26 + +// CHECK: vpermi2q 4064(%rdx), %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x76,0x52,0x7f] +          vpermi2q 4064(%rdx), %ymm22, %ymm26 + +// CHECK: vpermi2q 4096(%rdx), %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x76,0x92,0x00,0x10,0x00,0x00] +          vpermi2q 4096(%rdx), %ymm22, %ymm26 + +// CHECK: vpermi2q -4096(%rdx), %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x76,0x52,0x80] +          vpermi2q -4096(%rdx), %ymm22, %ymm26 + +// CHECK: vpermi2q -4128(%rdx), %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x20,0x76,0x92,0xe0,0xef,0xff,0xff] +          vpermi2q -4128(%rdx), %ymm22, %ymm26 + +// CHECK: vpermi2q 1016(%rdx){1to4}, %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x76,0x52,0x7f] +          vpermi2q 1016(%rdx){1to4}, %ymm22, %ymm26 + +// CHECK: vpermi2q 1024(%rdx){1to4}, %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x76,0x92,0x00,0x04,0x00,0x00] +          vpermi2q 1024(%rdx){1to4}, %ymm22, %ymm26 + +// CHECK: vpermi2q -1024(%rdx){1to4}, %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x76,0x52,0x80] +          vpermi2q -1024(%rdx){1to4}, %ymm22, %ymm26 + +// CHECK: vpermi2q -1032(%rdx){1to4}, %ymm22, %ymm26 +// CHECK:  encoding: [0x62,0x62,0xcd,0x30,0x76,0x92,0xf8,0xfb,0xff,0xff] +          vpermi2q -1032(%rdx){1to4}, %ymm22, %ymm26 + +// CHECK: vpermi2ps %xmm23, %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xa2,0x3d,0x00,0x77,0xff] +          vpermi2ps %xmm23, %xmm24, %xmm23 + +// CHECK: vpermi2ps %xmm23, %xmm24, %xmm23 {%k3} +// CHECK:  encoding: [0x62,0xa2,0x3d,0x03,0x77,0xff] +          vpermi2ps %xmm23, %xmm24, %xmm23 {%k3} + +// CHECK: vpermi2ps %xmm23, %xmm24, %xmm23 {%k3} {z} +// CHECK:  encoding: [0x62,0xa2,0x3d,0x83,0x77,0xff] +          vpermi2ps %xmm23, %xmm24, %xmm23 {%k3} {z} + +// CHECK: vpermi2ps (%rcx), %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x77,0x39] +          vpermi2ps (%rcx), %xmm24, %xmm23 + +// CHECK: vpermi2ps 291(%rax,%r14,8), %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xa2,0x3d,0x00,0x77,0xbc,0xf0,0x23,0x01,0x00,0x00] +          vpermi2ps 291(%rax,%r14,8), %xmm24, %xmm23 + +// CHECK: vpermi2ps (%rcx){1to4}, %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x10,0x77,0x39] +          vpermi2ps (%rcx){1to4}, %xmm24, %xmm23 + +// CHECK: vpermi2ps 2032(%rdx), %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x77,0x7a,0x7f] +          vpermi2ps 2032(%rdx), %xmm24, %xmm23 + +// CHECK: vpermi2ps 2048(%rdx), %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x77,0xba,0x00,0x08,0x00,0x00] +          vpermi2ps 2048(%rdx), %xmm24, %xmm23 + +// CHECK: vpermi2ps -2048(%rdx), %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x77,0x7a,0x80] +          vpermi2ps -2048(%rdx), %xmm24, %xmm23 + +// CHECK: vpermi2ps -2064(%rdx), %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x00,0x77,0xba,0xf0,0xf7,0xff,0xff] +          vpermi2ps -2064(%rdx), %xmm24, %xmm23 + +// CHECK: vpermi2ps 508(%rdx){1to4}, %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x10,0x77,0x7a,0x7f] +          vpermi2ps 508(%rdx){1to4}, %xmm24, %xmm23 + +// CHECK: vpermi2ps 512(%rdx){1to4}, %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x10,0x77,0xba,0x00,0x02,0x00,0x00] +          vpermi2ps 512(%rdx){1to4}, %xmm24, %xmm23 + +// CHECK: vpermi2ps -512(%rdx){1to4}, %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x10,0x77,0x7a,0x80] +          vpermi2ps -512(%rdx){1to4}, %xmm24, %xmm23 + +// CHECK: vpermi2ps -516(%rdx){1to4}, %xmm24, %xmm23 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x10,0x77,0xba,0xfc,0xfd,0xff,0xff] +          vpermi2ps -516(%rdx){1to4}, %xmm24, %xmm23 + +// CHECK: vpermi2ps %ymm20, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0x77,0xd4] +          vpermi2ps %ymm20, %ymm24, %ymm18 + +// CHECK: vpermi2ps %ymm20, %ymm24, %ymm18 {%k5} +// CHECK:  encoding: [0x62,0xa2,0x3d,0x25,0x77,0xd4] +          vpermi2ps %ymm20, %ymm24, %ymm18 {%k5} + +// CHECK: vpermi2ps %ymm20, %ymm24, %ymm18 {%k5} {z} +// CHECK:  encoding: [0x62,0xa2,0x3d,0xa5,0x77,0xd4] +          vpermi2ps %ymm20, %ymm24, %ymm18 {%k5} {z} + +// CHECK: vpermi2ps (%rcx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x77,0x11] +          vpermi2ps (%rcx), %ymm24, %ymm18 + +// CHECK: vpermi2ps 291(%rax,%r14,8), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xa2,0x3d,0x20,0x77,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermi2ps 291(%rax,%r14,8), %ymm24, %ymm18 + +// CHECK: vpermi2ps (%rcx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x77,0x11] +          vpermi2ps (%rcx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2ps 4064(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x77,0x52,0x7f] +          vpermi2ps 4064(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2ps 4096(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x77,0x92,0x00,0x10,0x00,0x00] +          vpermi2ps 4096(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2ps -4096(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x77,0x52,0x80] +          vpermi2ps -4096(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2ps -4128(%rdx), %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x20,0x77,0x92,0xe0,0xef,0xff,0xff] +          vpermi2ps -4128(%rdx), %ymm24, %ymm18 + +// CHECK: vpermi2ps 508(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x77,0x52,0x7f] +          vpermi2ps 508(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2ps 512(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x77,0x92,0x00,0x02,0x00,0x00] +          vpermi2ps 512(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2ps -512(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x77,0x52,0x80] +          vpermi2ps -512(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2ps -516(%rdx){1to8}, %ymm24, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0x3d,0x30,0x77,0x92,0xfc,0xfd,0xff,0xff] +          vpermi2ps -516(%rdx){1to8}, %ymm24, %ymm18 + +// CHECK: vpermi2pd %xmm27, %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x02,0xf5,0x00,0x77,0xe3] +          vpermi2pd %xmm27, %xmm17, %xmm28 + +// CHECK: vpermi2pd %xmm27, %xmm17, %xmm28 {%k4} +// CHECK:  encoding: [0x62,0x02,0xf5,0x04,0x77,0xe3] +          vpermi2pd %xmm27, %xmm17, %xmm28 {%k4} + +// CHECK: vpermi2pd %xmm27, %xmm17, %xmm28 {%k4} {z} +// CHECK:  encoding: [0x62,0x02,0xf5,0x84,0x77,0xe3] +          vpermi2pd %xmm27, %xmm17, %xmm28 {%k4} {z} + +// CHECK: vpermi2pd (%rcx), %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x77,0x21] +          vpermi2pd (%rcx), %xmm17, %xmm28 + +// CHECK: vpermi2pd 291(%rax,%r14,8), %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x22,0xf5,0x00,0x77,0xa4,0xf0,0x23,0x01,0x00,0x00] +          vpermi2pd 291(%rax,%r14,8), %xmm17, %xmm28 + +// CHECK: vpermi2pd (%rcx){1to2}, %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x77,0x21] +          vpermi2pd (%rcx){1to2}, %xmm17, %xmm28 + +// CHECK: vpermi2pd 2032(%rdx), %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x77,0x62,0x7f] +          vpermi2pd 2032(%rdx), %xmm17, %xmm28 + +// CHECK: vpermi2pd 2048(%rdx), %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x77,0xa2,0x00,0x08,0x00,0x00] +          vpermi2pd 2048(%rdx), %xmm17, %xmm28 + +// CHECK: vpermi2pd -2048(%rdx), %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x77,0x62,0x80] +          vpermi2pd -2048(%rdx), %xmm17, %xmm28 + +// CHECK: vpermi2pd -2064(%rdx), %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x00,0x77,0xa2,0xf0,0xf7,0xff,0xff] +          vpermi2pd -2064(%rdx), %xmm17, %xmm28 + +// CHECK: vpermi2pd 1016(%rdx){1to2}, %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x77,0x62,0x7f] +          vpermi2pd 1016(%rdx){1to2}, %xmm17, %xmm28 + +// CHECK: vpermi2pd 1024(%rdx){1to2}, %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x77,0xa2,0x00,0x04,0x00,0x00] +          vpermi2pd 1024(%rdx){1to2}, %xmm17, %xmm28 + +// CHECK: vpermi2pd -1024(%rdx){1to2}, %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x77,0x62,0x80] +          vpermi2pd -1024(%rdx){1to2}, %xmm17, %xmm28 + +// CHECK: vpermi2pd -1032(%rdx){1to2}, %xmm17, %xmm28 +// CHECK:  encoding: [0x62,0x62,0xf5,0x10,0x77,0xa2,0xf8,0xfb,0xff,0xff] +          vpermi2pd -1032(%rdx){1to2}, %xmm17, %xmm28 + +// CHECK: vpermi2pd %ymm27, %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x02,0xe5,0x20,0x77,0xf3] +          vpermi2pd %ymm27, %ymm19, %ymm30 + +// CHECK: vpermi2pd %ymm27, %ymm19, %ymm30 {%k3} +// CHECK:  encoding: [0x62,0x02,0xe5,0x23,0x77,0xf3] +          vpermi2pd %ymm27, %ymm19, %ymm30 {%k3} + +// CHECK: vpermi2pd %ymm27, %ymm19, %ymm30 {%k3} {z} +// CHECK:  encoding: [0x62,0x02,0xe5,0xa3,0x77,0xf3] +          vpermi2pd %ymm27, %ymm19, %ymm30 {%k3} {z} + +// CHECK: vpermi2pd (%rcx), %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x20,0x77,0x31] +          vpermi2pd (%rcx), %ymm19, %ymm30 + +// CHECK: vpermi2pd 291(%rax,%r14,8), %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x22,0xe5,0x20,0x77,0xb4,0xf0,0x23,0x01,0x00,0x00] +          vpermi2pd 291(%rax,%r14,8), %ymm19, %ymm30 + +// CHECK: vpermi2pd (%rcx){1to4}, %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x30,0x77,0x31] +          vpermi2pd (%rcx){1to4}, %ymm19, %ymm30 + +// CHECK: vpermi2pd 4064(%rdx), %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x20,0x77,0x72,0x7f] +          vpermi2pd 4064(%rdx), %ymm19, %ymm30 + +// CHECK: vpermi2pd 4096(%rdx), %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x20,0x77,0xb2,0x00,0x10,0x00,0x00] +          vpermi2pd 4096(%rdx), %ymm19, %ymm30 + +// CHECK: vpermi2pd -4096(%rdx), %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x20,0x77,0x72,0x80] +          vpermi2pd -4096(%rdx), %ymm19, %ymm30 + +// CHECK: vpermi2pd -4128(%rdx), %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x20,0x77,0xb2,0xe0,0xef,0xff,0xff] +          vpermi2pd -4128(%rdx), %ymm19, %ymm30 + +// CHECK: vpermi2pd 1016(%rdx){1to4}, %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x30,0x77,0x72,0x7f] +          vpermi2pd 1016(%rdx){1to4}, %ymm19, %ymm30 + +// CHECK: vpermi2pd 1024(%rdx){1to4}, %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x30,0x77,0xb2,0x00,0x04,0x00,0x00] +          vpermi2pd 1024(%rdx){1to4}, %ymm19, %ymm30 + +// CHECK: vpermi2pd -1024(%rdx){1to4}, %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x30,0x77,0x72,0x80] +          vpermi2pd -1024(%rdx){1to4}, %ymm19, %ymm30 + +// CHECK: vpermi2pd -1032(%rdx){1to4}, %ymm19, %ymm30 +// CHECK:  encoding: [0x62,0x62,0xe5,0x30,0x77,0xb2,0xf8,0xfb,0xff,0xff] +          vpermi2pd -1032(%rdx){1to4}, %ymm19, %ymm30 + +// CHECK: vpermt2d %xmm23, %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xa2,0x15,0x00,0x7e,0xef] +          vpermt2d %xmm23, %xmm29, %xmm21 + +// CHECK: vpermt2d %xmm23, %xmm29, %xmm21 {%k4} +// CHECK:  encoding: [0x62,0xa2,0x15,0x04,0x7e,0xef] +          vpermt2d %xmm23, %xmm29, %xmm21 {%k4} + +// CHECK: vpermt2d %xmm23, %xmm29, %xmm21 {%k4} {z} +// CHECK:  encoding: [0x62,0xa2,0x15,0x84,0x7e,0xef] +          vpermt2d %xmm23, %xmm29, %xmm21 {%k4} {z} + +// CHECK: vpermt2d (%rcx), %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0x7e,0x29] +          vpermt2d (%rcx), %xmm29, %xmm21 + +// CHECK: vpermt2d 291(%rax,%r14,8), %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xa2,0x15,0x00,0x7e,0xac,0xf0,0x23,0x01,0x00,0x00] +          vpermt2d 291(%rax,%r14,8), %xmm29, %xmm21 + +// CHECK: vpermt2d (%rcx){1to4}, %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0x7e,0x29] +          vpermt2d (%rcx){1to4}, %xmm29, %xmm21 + +// CHECK: vpermt2d 2032(%rdx), %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0x7e,0x6a,0x7f] +          vpermt2d 2032(%rdx), %xmm29, %xmm21 + +// CHECK: vpermt2d 2048(%rdx), %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0x7e,0xaa,0x00,0x08,0x00,0x00] +          vpermt2d 2048(%rdx), %xmm29, %xmm21 + +// CHECK: vpermt2d -2048(%rdx), %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0x7e,0x6a,0x80] +          vpermt2d -2048(%rdx), %xmm29, %xmm21 + +// CHECK: vpermt2d -2064(%rdx), %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x00,0x7e,0xaa,0xf0,0xf7,0xff,0xff] +          vpermt2d -2064(%rdx), %xmm29, %xmm21 + +// CHECK: vpermt2d 508(%rdx){1to4}, %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0x7e,0x6a,0x7f] +          vpermt2d 508(%rdx){1to4}, %xmm29, %xmm21 + +// CHECK: vpermt2d 512(%rdx){1to4}, %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0x7e,0xaa,0x00,0x02,0x00,0x00] +          vpermt2d 512(%rdx){1to4}, %xmm29, %xmm21 + +// CHECK: vpermt2d -512(%rdx){1to4}, %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0x7e,0x6a,0x80] +          vpermt2d -512(%rdx){1to4}, %xmm29, %xmm21 + +// CHECK: vpermt2d -516(%rdx){1to4}, %xmm29, %xmm21 +// CHECK:  encoding: [0x62,0xe2,0x15,0x10,0x7e,0xaa,0xfc,0xfd,0xff,0xff] +          vpermt2d -516(%rdx){1to4}, %xmm29, %xmm21 + +// CHECK: vpermt2d %ymm21, %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xa2,0x2d,0x20,0x7e,0xf5] +          vpermt2d %ymm21, %ymm26, %ymm22 + +// CHECK: vpermt2d %ymm21, %ymm26, %ymm22 {%k2} +// CHECK:  encoding: [0x62,0xa2,0x2d,0x22,0x7e,0xf5] +          vpermt2d %ymm21, %ymm26, %ymm22 {%k2} + +// CHECK: vpermt2d %ymm21, %ymm26, %ymm22 {%k2} {z} +// CHECK:  encoding: [0x62,0xa2,0x2d,0xa2,0x7e,0xf5] +          vpermt2d %ymm21, %ymm26, %ymm22 {%k2} {z} + +// CHECK: vpermt2d (%rcx), %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x7e,0x31] +          vpermt2d (%rcx), %ymm26, %ymm22 + +// CHECK: vpermt2d 291(%rax,%r14,8), %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xa2,0x2d,0x20,0x7e,0xb4,0xf0,0x23,0x01,0x00,0x00] +          vpermt2d 291(%rax,%r14,8), %ymm26, %ymm22 + +// CHECK: vpermt2d (%rcx){1to8}, %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x7e,0x31] +          vpermt2d (%rcx){1to8}, %ymm26, %ymm22 + +// CHECK: vpermt2d 4064(%rdx), %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x7e,0x72,0x7f] +          vpermt2d 4064(%rdx), %ymm26, %ymm22 + +// CHECK: vpermt2d 4096(%rdx), %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x7e,0xb2,0x00,0x10,0x00,0x00] +          vpermt2d 4096(%rdx), %ymm26, %ymm22 + +// CHECK: vpermt2d -4096(%rdx), %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x7e,0x72,0x80] +          vpermt2d -4096(%rdx), %ymm26, %ymm22 + +// CHECK: vpermt2d -4128(%rdx), %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x20,0x7e,0xb2,0xe0,0xef,0xff,0xff] +          vpermt2d -4128(%rdx), %ymm26, %ymm22 + +// CHECK: vpermt2d 508(%rdx){1to8}, %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x7e,0x72,0x7f] +          vpermt2d 508(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vpermt2d 512(%rdx){1to8}, %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x7e,0xb2,0x00,0x02,0x00,0x00] +          vpermt2d 512(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vpermt2d -512(%rdx){1to8}, %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x7e,0x72,0x80] +          vpermt2d -512(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vpermt2d -516(%rdx){1to8}, %ymm26, %ymm22 +// CHECK:  encoding: [0x62,0xe2,0x2d,0x30,0x7e,0xb2,0xfc,0xfd,0xff,0xff] +          vpermt2d -516(%rdx){1to8}, %ymm26, %ymm22 + +// CHECK: vpermt2q %xmm18, %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xa2,0xb5,0x00,0x7e,0xf2] +          vpermt2q %xmm18, %xmm25, %xmm22 + +// CHECK: vpermt2q %xmm18, %xmm25, %xmm22 {%k1} +// CHECK:  encoding: [0x62,0xa2,0xb5,0x01,0x7e,0xf2] +          vpermt2q %xmm18, %xmm25, %xmm22 {%k1} + +// CHECK: vpermt2q %xmm18, %xmm25, %xmm22 {%k1} {z} +// CHECK:  encoding: [0x62,0xa2,0xb5,0x81,0x7e,0xf2] +          vpermt2q %xmm18, %xmm25, %xmm22 {%k1} {z} + +// CHECK: vpermt2q (%rcx), %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x7e,0x31] +          vpermt2q (%rcx), %xmm25, %xmm22 + +// CHECK: vpermt2q 291(%rax,%r14,8), %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xa2,0xb5,0x00,0x7e,0xb4,0xf0,0x23,0x01,0x00,0x00] +          vpermt2q 291(%rax,%r14,8), %xmm25, %xmm22 + +// CHECK: vpermt2q (%rcx){1to2}, %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x7e,0x31] +          vpermt2q (%rcx){1to2}, %xmm25, %xmm22 + +// CHECK: vpermt2q 2032(%rdx), %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x7e,0x72,0x7f] +          vpermt2q 2032(%rdx), %xmm25, %xmm22 + +// CHECK: vpermt2q 2048(%rdx), %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x7e,0xb2,0x00,0x08,0x00,0x00] +          vpermt2q 2048(%rdx), %xmm25, %xmm22 + +// CHECK: vpermt2q -2048(%rdx), %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x7e,0x72,0x80] +          vpermt2q -2048(%rdx), %xmm25, %xmm22 + +// CHECK: vpermt2q -2064(%rdx), %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x00,0x7e,0xb2,0xf0,0xf7,0xff,0xff] +          vpermt2q -2064(%rdx), %xmm25, %xmm22 + +// CHECK: vpermt2q 1016(%rdx){1to2}, %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x7e,0x72,0x7f] +          vpermt2q 1016(%rdx){1to2}, %xmm25, %xmm22 + +// CHECK: vpermt2q 1024(%rdx){1to2}, %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x7e,0xb2,0x00,0x04,0x00,0x00] +          vpermt2q 1024(%rdx){1to2}, %xmm25, %xmm22 + +// CHECK: vpermt2q -1024(%rdx){1to2}, %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x7e,0x72,0x80] +          vpermt2q -1024(%rdx){1to2}, %xmm25, %xmm22 + +// CHECK: vpermt2q -1032(%rdx){1to2}, %xmm25, %xmm22 +// CHECK:  encoding: [0x62,0xe2,0xb5,0x10,0x7e,0xb2,0xf8,0xfb,0xff,0xff] +          vpermt2q -1032(%rdx){1to2}, %xmm25, %xmm22 + +// CHECK: vpermt2q %ymm20, %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xa2,0xf5,0x20,0x7e,0xd4] +          vpermt2q %ymm20, %ymm17, %ymm18 + +// CHECK: vpermt2q %ymm20, %ymm17, %ymm18 {%k6} +// CHECK:  encoding: [0x62,0xa2,0xf5,0x26,0x7e,0xd4] +          vpermt2q %ymm20, %ymm17, %ymm18 {%k6} + +// CHECK: vpermt2q %ymm20, %ymm17, %ymm18 {%k6} {z} +// CHECK:  encoding: [0x62,0xa2,0xf5,0xa6,0x7e,0xd4] +          vpermt2q %ymm20, %ymm17, %ymm18 {%k6} {z} + +// CHECK: vpermt2q (%rcx), %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0x7e,0x11] +          vpermt2q (%rcx), %ymm17, %ymm18 + +// CHECK: vpermt2q 291(%rax,%r14,8), %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xa2,0xf5,0x20,0x7e,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermt2q 291(%rax,%r14,8), %ymm17, %ymm18 + +// CHECK: vpermt2q (%rcx){1to4}, %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0x7e,0x11] +          vpermt2q (%rcx){1to4}, %ymm17, %ymm18 + +// CHECK: vpermt2q 4064(%rdx), %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0x7e,0x52,0x7f] +          vpermt2q 4064(%rdx), %ymm17, %ymm18 + +// CHECK: vpermt2q 4096(%rdx), %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0x7e,0x92,0x00,0x10,0x00,0x00] +          vpermt2q 4096(%rdx), %ymm17, %ymm18 + +// CHECK: vpermt2q -4096(%rdx), %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0x7e,0x52,0x80] +          vpermt2q -4096(%rdx), %ymm17, %ymm18 + +// CHECK: vpermt2q -4128(%rdx), %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x20,0x7e,0x92,0xe0,0xef,0xff,0xff] +          vpermt2q -4128(%rdx), %ymm17, %ymm18 + +// CHECK: vpermt2q 1016(%rdx){1to4}, %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0x7e,0x52,0x7f] +          vpermt2q 1016(%rdx){1to4}, %ymm17, %ymm18 + +// CHECK: vpermt2q 1024(%rdx){1to4}, %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0x7e,0x92,0x00,0x04,0x00,0x00] +          vpermt2q 1024(%rdx){1to4}, %ymm17, %ymm18 + +// CHECK: vpermt2q -1024(%rdx){1to4}, %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0x7e,0x52,0x80] +          vpermt2q -1024(%rdx){1to4}, %ymm17, %ymm18 + +// CHECK: vpermt2q -1032(%rdx){1to4}, %ymm17, %ymm18 +// CHECK:  encoding: [0x62,0xe2,0xf5,0x30,0x7e,0x92,0xf8,0xfb,0xff,0xff] +          vpermt2q -1032(%rdx){1to4}, %ymm17, %ymm18 + +// CHECK: vpermt2ps %xmm18, %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xa2,0x45,0x00,0x7f,0xda] +          vpermt2ps %xmm18, %xmm23, %xmm19 + +// CHECK: vpermt2ps %xmm18, %xmm23, %xmm19 {%k1} +// CHECK:  encoding: [0x62,0xa2,0x45,0x01,0x7f,0xda] +          vpermt2ps %xmm18, %xmm23, %xmm19 {%k1} + +// CHECK: vpermt2ps %xmm18, %xmm23, %xmm19 {%k1} {z} +// CHECK:  encoding: [0x62,0xa2,0x45,0x81,0x7f,0xda] +          vpermt2ps %xmm18, %xmm23, %xmm19 {%k1} {z} + +// CHECK: vpermt2ps (%rcx), %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x7f,0x19] +          vpermt2ps (%rcx), %xmm23, %xmm19 + +// CHECK: vpermt2ps 291(%rax,%r14,8), %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xa2,0x45,0x00,0x7f,0x9c,0xf0,0x23,0x01,0x00,0x00] +          vpermt2ps 291(%rax,%r14,8), %xmm23, %xmm19 + +// CHECK: vpermt2ps (%rcx){1to4}, %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x7f,0x19] +          vpermt2ps (%rcx){1to4}, %xmm23, %xmm19 + +// CHECK: vpermt2ps 2032(%rdx), %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x7f,0x5a,0x7f] +          vpermt2ps 2032(%rdx), %xmm23, %xmm19 + +// CHECK: vpermt2ps 2048(%rdx), %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x7f,0x9a,0x00,0x08,0x00,0x00] +          vpermt2ps 2048(%rdx), %xmm23, %xmm19 + +// CHECK: vpermt2ps -2048(%rdx), %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x7f,0x5a,0x80] +          vpermt2ps -2048(%rdx), %xmm23, %xmm19 + +// CHECK: vpermt2ps -2064(%rdx), %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x00,0x7f,0x9a,0xf0,0xf7,0xff,0xff] +          vpermt2ps -2064(%rdx), %xmm23, %xmm19 + +// CHECK: vpermt2ps 508(%rdx){1to4}, %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x7f,0x5a,0x7f] +          vpermt2ps 508(%rdx){1to4}, %xmm23, %xmm19 + +// CHECK: vpermt2ps 512(%rdx){1to4}, %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x7f,0x9a,0x00,0x02,0x00,0x00] +          vpermt2ps 512(%rdx){1to4}, %xmm23, %xmm19 + +// CHECK: vpermt2ps -512(%rdx){1to4}, %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x7f,0x5a,0x80] +          vpermt2ps -512(%rdx){1to4}, %xmm23, %xmm19 + +// CHECK: vpermt2ps -516(%rdx){1to4}, %xmm23, %xmm19 +// CHECK:  encoding: [0x62,0xe2,0x45,0x10,0x7f,0x9a,0xfc,0xfd,0xff,0xff] +          vpermt2ps -516(%rdx){1to4}, %xmm23, %xmm19 + +// CHECK: vpermt2ps %ymm21, %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x22,0x25,0x20,0x7f,0xd5] +          vpermt2ps %ymm21, %ymm27, %ymm26 + +// CHECK: vpermt2ps %ymm21, %ymm27, %ymm26 {%k3} +// CHECK:  encoding: [0x62,0x22,0x25,0x23,0x7f,0xd5] +          vpermt2ps %ymm21, %ymm27, %ymm26 {%k3} + +// CHECK: vpermt2ps %ymm21, %ymm27, %ymm26 {%k3} {z} +// CHECK:  encoding: [0x62,0x22,0x25,0xa3,0x7f,0xd5] +          vpermt2ps %ymm21, %ymm27, %ymm26 {%k3} {z} + +// CHECK: vpermt2ps (%rcx), %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x20,0x7f,0x11] +          vpermt2ps (%rcx), %ymm27, %ymm26 + +// CHECK: vpermt2ps 291(%rax,%r14,8), %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x22,0x25,0x20,0x7f,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermt2ps 291(%rax,%r14,8), %ymm27, %ymm26 + +// CHECK: vpermt2ps (%rcx){1to8}, %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x30,0x7f,0x11] +          vpermt2ps (%rcx){1to8}, %ymm27, %ymm26 + +// CHECK: vpermt2ps 4064(%rdx), %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x20,0x7f,0x52,0x7f] +          vpermt2ps 4064(%rdx), %ymm27, %ymm26 + +// CHECK: vpermt2ps 4096(%rdx), %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x20,0x7f,0x92,0x00,0x10,0x00,0x00] +          vpermt2ps 4096(%rdx), %ymm27, %ymm26 + +// CHECK: vpermt2ps -4096(%rdx), %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x20,0x7f,0x52,0x80] +          vpermt2ps -4096(%rdx), %ymm27, %ymm26 + +// CHECK: vpermt2ps -4128(%rdx), %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x20,0x7f,0x92,0xe0,0xef,0xff,0xff] +          vpermt2ps -4128(%rdx), %ymm27, %ymm26 + +// CHECK: vpermt2ps 508(%rdx){1to8}, %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x30,0x7f,0x52,0x7f] +          vpermt2ps 508(%rdx){1to8}, %ymm27, %ymm26 + +// CHECK: vpermt2ps 512(%rdx){1to8}, %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x30,0x7f,0x92,0x00,0x02,0x00,0x00] +          vpermt2ps 512(%rdx){1to8}, %ymm27, %ymm26 + +// CHECK: vpermt2ps -512(%rdx){1to8}, %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x30,0x7f,0x52,0x80] +          vpermt2ps -512(%rdx){1to8}, %ymm27, %ymm26 + +// CHECK: vpermt2ps -516(%rdx){1to8}, %ymm27, %ymm26 +// CHECK:  encoding: [0x62,0x62,0x25,0x30,0x7f,0x92,0xfc,0xfd,0xff,0xff] +          vpermt2ps -516(%rdx){1to8}, %ymm27, %ymm26 + +// CHECK: vpermt2pd %xmm17, %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x22,0xd5,0x00,0x7f,0xd1] +          vpermt2pd %xmm17, %xmm21, %xmm26 + +// CHECK: vpermt2pd %xmm17, %xmm21, %xmm26 {%k5} +// CHECK:  encoding: [0x62,0x22,0xd5,0x05,0x7f,0xd1] +          vpermt2pd %xmm17, %xmm21, %xmm26 {%k5} + +// CHECK: vpermt2pd %xmm17, %xmm21, %xmm26 {%k5} {z} +// CHECK:  encoding: [0x62,0x22,0xd5,0x85,0x7f,0xd1] +          vpermt2pd %xmm17, %xmm21, %xmm26 {%k5} {z} + +// CHECK: vpermt2pd (%rcx), %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x7f,0x11] +          vpermt2pd (%rcx), %xmm21, %xmm26 + +// CHECK: vpermt2pd 291(%rax,%r14,8), %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x22,0xd5,0x00,0x7f,0x94,0xf0,0x23,0x01,0x00,0x00] +          vpermt2pd 291(%rax,%r14,8), %xmm21, %xmm26 + +// CHECK: vpermt2pd (%rcx){1to2}, %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x7f,0x11] +          vpermt2pd (%rcx){1to2}, %xmm21, %xmm26 + +// CHECK: vpermt2pd 2032(%rdx), %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x7f,0x52,0x7f] +          vpermt2pd 2032(%rdx), %xmm21, %xmm26 + +// CHECK: vpermt2pd 2048(%rdx), %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x7f,0x92,0x00,0x08,0x00,0x00] +          vpermt2pd 2048(%rdx), %xmm21, %xmm26 + +// CHECK: vpermt2pd -2048(%rdx), %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x7f,0x52,0x80] +          vpermt2pd -2048(%rdx), %xmm21, %xmm26 + +// CHECK: vpermt2pd -2064(%rdx), %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x00,0x7f,0x92,0xf0,0xf7,0xff,0xff] +          vpermt2pd -2064(%rdx), %xmm21, %xmm26 + +// CHECK: vpermt2pd 1016(%rdx){1to2}, %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x7f,0x52,0x7f] +          vpermt2pd 1016(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vpermt2pd 1024(%rdx){1to2}, %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x7f,0x92,0x00,0x04,0x00,0x00] +          vpermt2pd 1024(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vpermt2pd -1024(%rdx){1to2}, %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x7f,0x52,0x80] +          vpermt2pd -1024(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vpermt2pd -1032(%rdx){1to2}, %xmm21, %xmm26 +// CHECK:  encoding: [0x62,0x62,0xd5,0x10,0x7f,0x92,0xf8,0xfb,0xff,0xff] +          vpermt2pd -1032(%rdx){1to2}, %xmm21, %xmm26 + +// CHECK: vpermt2pd %ymm17, %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xa2,0xc5,0x20,0x7f,0xc9] +          vpermt2pd %ymm17, %ymm23, %ymm17 + +// CHECK: vpermt2pd %ymm17, %ymm23, %ymm17 {%k1} +// CHECK:  encoding: [0x62,0xa2,0xc5,0x21,0x7f,0xc9] +          vpermt2pd %ymm17, %ymm23, %ymm17 {%k1} + +// CHECK: vpermt2pd %ymm17, %ymm23, %ymm17 {%k1} {z} +// CHECK:  encoding: [0x62,0xa2,0xc5,0xa1,0x7f,0xc9] +          vpermt2pd %ymm17, %ymm23, %ymm17 {%k1} {z} + +// CHECK: vpermt2pd (%rcx), %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x7f,0x09] +          vpermt2pd (%rcx), %ymm23, %ymm17 + +// CHECK: vpermt2pd 291(%rax,%r14,8), %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xa2,0xc5,0x20,0x7f,0x8c,0xf0,0x23,0x01,0x00,0x00] +          vpermt2pd 291(%rax,%r14,8), %ymm23, %ymm17 + +// CHECK: vpermt2pd (%rcx){1to4}, %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x7f,0x09] +          vpermt2pd (%rcx){1to4}, %ymm23, %ymm17 + +// CHECK: vpermt2pd 4064(%rdx), %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x7f,0x4a,0x7f] +          vpermt2pd 4064(%rdx), %ymm23, %ymm17 + +// CHECK: vpermt2pd 4096(%rdx), %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x7f,0x8a,0x00,0x10,0x00,0x00] +          vpermt2pd 4096(%rdx), %ymm23, %ymm17 + +// CHECK: vpermt2pd -4096(%rdx), %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x7f,0x4a,0x80] +          vpermt2pd -4096(%rdx), %ymm23, %ymm17 + +// CHECK: vpermt2pd -4128(%rdx), %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x20,0x7f,0x8a,0xe0,0xef,0xff,0xff] +          vpermt2pd -4128(%rdx), %ymm23, %ymm17 + +// CHECK: vpermt2pd 1016(%rdx){1to4}, %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x7f,0x4a,0x7f] +          vpermt2pd 1016(%rdx){1to4}, %ymm23, %ymm17 + +// CHECK: vpermt2pd 1024(%rdx){1to4}, %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x7f,0x8a,0x00,0x04,0x00,0x00] +          vpermt2pd 1024(%rdx){1to4}, %ymm23, %ymm17 + +// CHECK: vpermt2pd -1024(%rdx){1to4}, %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x7f,0x4a,0x80] +          vpermt2pd -1024(%rdx){1to4}, %ymm23, %ymm17 + +// CHECK: vpermt2pd -1032(%rdx){1to4}, %ymm23, %ymm17 +// CHECK:  encoding: [0x62,0xe2,0xc5,0x30,0x7f,0x8a,0xf8,0xfb,0xff,0xff] +          vpermt2pd -1032(%rdx){1to4}, %ymm23, %ymm17 | 

