diff options
author | Igor Breger <igor.breger@intel.com> | 2015-10-04 07:20:41 +0000 |
---|---|---|
committer | Igor Breger <igor.breger@intel.com> | 2015-10-04 07:20:41 +0000 |
commit | 78741a1b1e43d5489d863a82e9a116b6ef620643 (patch) | |
tree | 268160b26104cb73e183f943e86479b4263e10d7 /llvm/lib/Target | |
parent | 379539116e02acd098868a84f98fb81ee1561539 (diff) | |
download | bcm5719-llvm-78741a1b1e43d5489d863a82e9a116b6ef620643.tar.gz bcm5719-llvm-78741a1b1e43d5489d863a82e9a116b6ef620643.zip |
AVX512: Implemented encoding and intrinsics for VPERMILPS/PD instructions.
Added tests for intrinsics and encoding.
Differential Revision: http://reviews.llvm.org/D12690
llvm-svn: 249261
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 122 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 26 |
4 files changed, 102 insertions, 61 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b1e1dfa5f79..bb5c3770233 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16003,11 +16003,16 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget RoundingMode, Sae), Mask, Src0, Subtarget, DAG); } - case INTR_TYPE_2OP_MASK: { + case INTR_TYPE_2OP_MASK: + case INTR_TYPE_2OP_IMM8_MASK: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue PassThru = Op.getOperand(3); SDValue Mask = Op.getOperand(4); + + if (IntrData->Type == INTR_TYPE_2OP_IMM8_MASK) + Src2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src2); + // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 84d8a2bbc80..3c9bcf64848 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1109,62 +1109,6 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", } //===----------------------------------------------------------------------===// -// AVX-512 - VPERM -// -// -- immediate form -- -multiclass avx512_perm_imm<bits<8> opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _> { - let ExeDomain = _.ExeDomain in { - def ri : AVX512AIi8<opc, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, - (_.VT (OpNode _.RC:$src1, (i8 imm:$src2))))]>, - EVEX; - def mi : AVX512AIi8<opc, MRMSrcMem, (outs _.RC:$dst), - (ins _.MemOp:$src1, u8imm:$src2), - !strconcat(OpcodeStr, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, - (_.VT (OpNode (_.LdFrag addr:$src1), - (i8 imm:$src2))))]>, - EVEX, EVEX_CD8<_.EltSize, CD8VF>; -} -} - -multiclass avx512_permil<bits<8> OpcImm, bits<8> OpcVar, X86VectorVTInfo _, - X86VectorVTInfo Ctrl> : - avx512_perm_imm<OpcImm, "vpermil" # _.Suffix, X86VPermilpi, _> { - let ExeDomain = _.ExeDomain in { - def rr : AVX5128I<OpcVar, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src1, _.RC:$src2), - !strconcat("vpermil" # _.Suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, - (_.VT (X86VPermilpv _.RC:$src1, - (Ctrl.VT Ctrl.RC:$src2))))]>, - EVEX_4V; - def rm : AVX5128I<OpcVar, MRMSrcMem, (outs _.RC:$dst), - (ins _.RC:$src1, Ctrl.MemOp:$src2), - !strconcat("vpermil" # _.Suffix, - "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, - (_.VT (X86VPermilpv _.RC:$src1, - (Ctrl.VT (Ctrl.LdFrag addr:$src2)))))]>, - EVEX_4V; - } -} -defm VPERMILPSZ : avx512_permil<0x04, 0x0C, v16f32_info, v16i32_info>, - EVEX_V512; -defm VPERMILPDZ : avx512_permil<0x05, 0x0D, v8f64_info, v8i64_info>, - EVEX_V512, VEX_W; - -def : Pat<(v16i32 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPSZri VR512:$src1, imm:$imm)>; -def : Pat<(v8i64 (X86VPermilpi VR512:$src1, (i8 imm:$imm))), - (VPERMILPDZri VR512:$src1, imm:$imm)>; - // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { @@ -4130,7 +4074,73 @@ defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq", defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd", X86VPermi, avx512vl_f64_info>, EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W; +//===----------------------------------------------------------------------===// +// AVX-512 - VPERMIL +//===----------------------------------------------------------------------===// + +multiclass avx512_permil_vec<bits<8> OpcVar, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _, X86VectorVTInfo Ctrl> { + defm rr: AVX512_maskable<OpcVar, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, Ctrl.RC:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_.VT (OpNode _.RC:$src1, + (Ctrl.VT Ctrl.RC:$src2)))>, + T8PD, EVEX_4V; + let mayLoad = 1 in { + defm rm: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, Ctrl.MemOp:$src2), OpcodeStr, + "$src2, $src1", "$src1, $src2", + (_.VT (OpNode + _.RC:$src1, + (Ctrl.VT (bitconvert(Ctrl.LdFrag addr:$src2)))))>, + T8PD, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + defm rmb: AVX512_maskable<OpcVar, MRMSrcMem, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, + "${src2}"##_.BroadcastStr##", $src1", + "$src1, ${src2}"##_.BroadcastStr, + (_.VT (OpNode + _.RC:$src1, + (Ctrl.VT (X86VBroadcast + (Ctrl.ScalarLdFrag addr:$src2)))))>, + T8PD, EVEX_4V, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>; + }//let mayLoad = 1 +} + +multiclass avx512_permil_vec_common<string OpcodeStr, bits<8> OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + let Predicates = [HasAVX512] in { + defm Z : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info512, + Ctrl.info512>, EVEX_V512; + } + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info128, + Ctrl.info128>, EVEX_V128; + defm Z256 : avx512_permil_vec<OpcVar, OpcodeStr, X86VPermilpv, _.info256, + Ctrl.info256>, EVEX_V256; + } +} + +multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar, + AVX512VLVectorVTInfo _, AVX512VLVectorVTInfo Ctrl>{ + + defm NAME: avx512_permil_vec_common<OpcodeStr, OpcVar, _, Ctrl>; + defm NAME: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, OpcodeStr, + X86VPermilpi, _>, + EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; + + let isCodeGenOnly = 1 in { + // lowering implementation with the alternative types + defm NAME#_I: avx512_permil_vec_common<OpcodeStr, OpcVar, Ctrl, Ctrl>; + defm NAME#_I: avx512_shift_rmi_sizes<OpcImm, MRMSrcReg, MRMSrcMem, + OpcodeStr, X86VPermilpi, Ctrl>, + EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>; + } +} +defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info, + avx512vl_i32_info>; +defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info, + avx512vl_i64_info>, VEX_W; //===----------------------------------------------------------------------===// // AVX-512 - VPSHUFD, VPSHUFLW, VPSHUFHW //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index eec569d548a..f390d8fd34f 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -8087,17 +8087,19 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, (bitconvert (i_frag addr:$src2))))]>, VEX_4V, Sched<[WriteFShuffleLd, ReadAfterLd]>; - def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), + let Predicates = [HasAVX, NoVLX] in { + def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX, Sched<[WriteFShuffle]>; - def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), + def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), (ins x86memop_f:$src1, u8imm:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX, Sched<[WriteFShuffleLd]>; + }// Predicates = [HasAVX, NoVLX] } let ExeDomain = SSEPackedSingle in { @@ -8113,7 +8115,7 @@ let ExeDomain = SSEPackedDouble in { loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))), (VPERMILPSYrr VR256:$src1, VR256:$src2)>; def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 02f245d53d6..e9970d1db75 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -22,7 +22,7 @@ enum IntrinsicType { INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP, CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, VSHIFT_MASK, COMI, INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM, - INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, + INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK, INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK, FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, @@ -1407,6 +1407,30 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_128, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_256, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_pd_512, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_128, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_256, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermil_ps_512, INTR_TYPE_2OP_IMM8_MASK, + X86ISD::VPERMILPI, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK, + X86ISD::VPERMILPV, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, |