diff options
| author | Adam Nemet <anemet@apple.com> | 2014-07-02 21:25:54 +0000 | 
|---|---|---|
| committer | Adam Nemet <anemet@apple.com> | 2014-07-02 21:25:54 +0000 | 
| commit | 2415a497b5e77b0cd6bce5c18d32914429038aa5 (patch) | |
| tree | 7816b727692bb79d9e46384846468f45d12d8526 /llvm/lib/Target/X86 | |
| parent | 100eb93f89cc8d9a177e0af7aca8e778a24d62de (diff) | |
| download | bcm5719-llvm-2415a497b5e77b0cd6bce5c18d32914429038aa5.tar.gz bcm5719-llvm-2415a497b5e77b0cd6bce5c18d32914429038aa5.zip  | |
[X86] AVX512: Add writemask variants for vperm*2*
This includes assembler and codegen support (see the new tests in
avx512-encodings.s and avx512-shuffle.ll).
<rdar://problem/17492620>
llvm-svn: 212221
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 82 | 
1 files changed, 68 insertions, 14 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 02df80e5666..daff30bea45 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -609,7 +609,7 @@ defm VPERMPDZ  : avx512_perm<0x16, "vpermpd", VR512,  memopv8f64, f512mem,  // -- VPERM2I - 3 source operands form --  multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC,                            PatFrag mem_frag, X86MemOperand x86memop, -                          SDNode OpNode, ValueType OpVT> { +                          SDNode OpNode, ValueType OpVT, RegisterClass KRC> {  let Constraints = "$src1 = $dst" in {    def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst),                     (ins RC:$src1, RC:$src2, RC:$src3), @@ -619,32 +619,86 @@ let Constraints = "$src1 = $dst" in {                       (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>,                      EVEX_4V; +  def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), +                   (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), +                   !strconcat(OpcodeStr, +                       " \t{$src3, $src2, $dst {${mask}}|" +                       "$dst {${mask}}, $src2, $src3}"), +                   [(set RC:$dst, (OpVT (vselect KRC:$mask, +                                           (OpNode RC:$src1, RC:$src2, +                                              RC:$src3), +                                           RC:$src1)))]>, +                    EVEX_4V, EVEX_K; + +  let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> +    def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), +                   (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), +                   !strconcat(OpcodeStr, +                       " \t{$src3, $src2, $dst {${mask}} {z} |", +                       "$dst {${mask}} {z}, $src2, $src3}"), +                   [(set RC:$dst, (OpVT (vselect KRC:$mask, +                                           (OpNode RC:$src1, RC:$src2, +                                              RC:$src3), +                                           (OpVT (bitconvert +                                              (v16i32 immAllZerosV))))))]>, +                    EVEX_4V, EVEX_KZ; +    def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst),                     (ins RC:$src1, RC:$src2, x86memop:$src3),                     !strconcat(OpcodeStr,                      " \t{$src3, $src2, $dst|$dst, $src2, $src3}"),                     [(set RC:$dst, -                     (OpVT (OpNode RC:$src1, RC:$src2,  +                     (OpVT (OpNode RC:$src1, RC:$src2,                        (mem_frag addr:$src3))))]>, EVEX_4V; + +  def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), +                   (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), +                   !strconcat(OpcodeStr, +                    " \t{$src3, $src2, $dst {${mask}}|" +                    "$dst {${mask}}, $src2, $src3}"), +                   [(set RC:$dst, +                       (OpVT (vselect KRC:$mask, +                                      (OpNode RC:$src1, RC:$src2, +                                         (mem_frag addr:$src3)), +                                      RC:$src1)))]>, +                    EVEX_4V, EVEX_K; + +  let AddedComplexity = 10 in // Prefer over the rrkz variant +    def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), +                   (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), +                   !strconcat(OpcodeStr, +                    " \t{$src3, $src2, $dst {${mask}} {z}|" +                    "$dst {${mask}} {z}, $src2, $src3}"), +                   [(set RC:$dst, +                     (OpVT (vselect KRC:$mask, +                                    (OpNode RC:$src1, RC:$src2, +                                            (mem_frag addr:$src3)), +                                    (OpVT (bitconvert +                                       (v16i32 immAllZerosV))))))]>, +                    EVEX_4V, EVEX_KZ;    }  } -defm VPERMI2D  : avx512_perm_3src<0x76, "vpermi2d",  VR512, memopv16i32, i512mem,  -                               X86VPermiv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q  : avx512_perm_3src<0x76, "vpermi2q",  VR512, memopv8i64, i512mem,  -                               X86VPermiv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps",  VR512, memopv16f32, i512mem,  -                               X86VPermiv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd",  VR512, memopv8f64, i512mem,  -                               X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2D  : avx512_perm_3src<0x76, "vpermi2d",  VR512, memopv16i32, +                                  i512mem, X86VPermiv3, v16i32, VK16WM>, +                 EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q  : avx512_perm_3src<0x76, "vpermi2q",  VR512, memopv8i64, +                                  i512mem, X86VPermiv3, v8i64, VK8WM>, +                 EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps",  VR512, memopv16f32, +                                  i512mem, X86VPermiv3, v16f32, VK16WM>, +                 EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd",  VR512, memopv8f64, +                                  i512mem, X86VPermiv3, v8f64, VK8WM>, +                  EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;  defm VPERMT2D  : avx512_perm_3src<0x7E, "vpermt2d",  VR512, memopv16i32, i512mem,  -                               X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +                               X86VPermv3, v16i32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>;  defm VPERMT2Q  : avx512_perm_3src<0x7E, "vpermt2q",  VR512, memopv8i64, i512mem,  -                               X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +                               X86VPermv3, v8i64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;  defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps",  VR512, memopv16f32, i512mem,  -                               X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; +                               X86VPermv3, v16f32, VK16WM>, EVEX_V512, EVEX_CD8<32, CD8VF>;  defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd",  VR512, memopv8f64, i512mem,  -                               X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +                               X86VPermv3, v8f64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;  def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx),                     (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))),  | 

