diff options
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 37 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 33 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 74 |
5 files changed, 73 insertions, 82 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5270e471959..09bd7bf274a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4372,7 +4372,6 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::VPPERM: case X86ISD::VPERMV: case X86ISD::VPERMV3: - case X86ISD::VPERMIV3: case X86ISD::VZEXT_MOVL: return true; } @@ -4388,7 +4387,6 @@ static bool isTargetShuffleVariableMask(unsigned Opcode) { case X86ISD::VPPERM: case X86ISD::VPERMV: case X86ISD::VPERMV3: - case X86ISD::VPERMIV3: return true; // 'Faux' Target Shuffles. case ISD::AND: @@ -5977,21 +5975,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, } return false; } - case X86ISD::VPERMIV3: { - assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); - assert(N->getOperand(2).getValueType() == VT && "Unexpected value type"); - IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2); - // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one. - Ops.push_back(N->getOperand(1)); - Ops.push_back(N->getOperand(2)); - SDValue MaskNode = N->getOperand(0); - unsigned MaskEltSize = VT.getScalarSizeInBits(); - if (auto *C = getTargetConstantFromNode(MaskNode)) { - DecodeVPERMV3Mask(C, MaskEltSize, Mask); - break; - } - return false; - } default: llvm_unreachable("unknown target shuffle node"); } @@ -20540,9 +20523,9 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SDValue Src3 = Op.getOperand(3); SDValue Mask = Op.getOperand(4); MVT VT = Op.getSimpleValueType(); - SDValue PassThru = SDValue(); // set PassThru element + SDValue PassThru; if (IntrData->Type == VPERM_3OP_MASKZ) PassThru = getZeroVector(VT, Subtarget, DAG, dl); else @@ -20554,6 +20537,22 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Src2, Src1, Src3), Mask, PassThru, Subtarget, DAG); } + case VPERMI_3OP_MASK:{ + // Src2 is the PassThru + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Mask = Op.getOperand(4); + MVT VT = Op.getSimpleValueType(); + + // set PassThru element + SDValue PassThru = DAG.getBitcast(VT, Src2); + + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, + dl, Op.getValueType(), + Src1, Src2, Src3), + Mask, PassThru, Subtarget, DAG); + } case FMA_OP_MASK3: case FMA_OP_MASKZ: case FMA_OP_MASK: { @@ -25873,7 +25872,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VPERM2X128: return "X86ISD::VPERM2X128"; case X86ISD::VPERMV: return "X86ISD::VPERMV"; case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; - case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG"; case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; @@ -38861,7 +38859,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::VPERMI: case X86ISD::VPERMV: case X86ISD::VPERMV3: - case X86ISD::VPERMIV3: case X86ISD::VPERMIL2: case X86ISD::VPERMILPI: case X86ISD::VPERMILPV: diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 58d0c9d92af..dd6be5b8e48 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -428,10 +428,6 @@ namespace llvm { // Res = VPERMV3 V0, MaskV, V1 VPERMV3, - // 3-op Variable Permute overwriting the index (VPERMI2). - // Res = VPERMIV3 V0, MaskV, V1 - VPERMIV3, - // Bitwise ternary logic. VPTERNLOG, // Fix Up Special Packed Float32/64 values. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 5fecf0a0a13..3bf097bd79c 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -334,6 +334,7 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _, // Similar to AVX512_maskable_3src but in this case the input VT for the tied // operand differs from the output VT. This requires a bitconvert on // the preserved vector going into the vselect. +// NOTE: The unmasked pattern is disabled. multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, X86VectorVTInfo InVT, dag Outs, dag NonTiedIns, string OpcodeStr, @@ -343,7 +344,7 @@ multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, !con((ins InVT.RC:$src1), NonTiedIns), !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), - OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, + OpcodeStr, AttSrcAsm, IntelSrcAsm, (null_frag), (vselect InVT.KRCWM:$mask, RHS, (bitconvert InVT.RC:$src1)), vselect, "", IsCommutable>; @@ -1719,17 +1720,19 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { -let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { +let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, + hasSideEffects = 0 in { defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), (ins _.RC:$src2, _.RC:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>, + (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, _.RC:$src3)), 1>, EVEX_4V, AVX5128IBase, Sched<[sched]>; + let mayLoad = 1 in defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), (ins _.RC:$src2, _.MemOp:$src3), OpcodeStr, "$src3, $src2", "$src2, $src3", - (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, + (_.VT (X86VPermt2 _.RC:$src2, IdxVT.RC:$src1, (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>, EVEX_4V, AVX5128IBase, Sched<[sched.Folded, ReadAfterLd]>; } @@ -1738,13 +1741,14 @@ let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in { multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched, X86VectorVTInfo _, X86VectorVTInfo IdxVT> { - let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in + let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain, + hasSideEffects = 0, mayLoad = 1 in defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr, !strconcat("${src3}", _.BroadcastStr,", $src2"), !strconcat("$src2, ${src3}", _.BroadcastStr ), - (_.VT (X86VPermi2X IdxVT.RC:$src1, - _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, + (_.VT (X86VPermt2 _.RC:$src2, + IdxVT.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))), 1>, AVX5128IBase, EVEX_4V, EVEX_B, Sched<[sched.Folded, ReadAfterLd]>; } @@ -1806,21 +1810,22 @@ multiclass avx512_perm_i_lowering<string InstrStr, X86VectorVTInfo _, X86VectorVTInfo IdxVT, X86VectorVTInfo CastVT> { def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), - (_.VT _.RC:$src2), _.RC:$src3), + (X86VPermt2 (_.VT _.RC:$src2), + (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), _.RC:$src3), (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast<Instruction>(InstrStr#"rrk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, _.RC:$src3)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), - _.RC:$src2, (_.LdFrag addr:$src3)), + (X86VPermt2 _.RC:$src2, + (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), + (_.LdFrag addr:$src3)), (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast<Instruction>(InstrStr#"rmk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86VPermi2X (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), - _.RC:$src2, - (X86VBroadcast (_.ScalarLdFrag addr:$src3))), + (X86VPermt2 _.RC:$src2, + (IdxVT.VT (bitconvert (CastVT.VT _.RC:$src1))), + (X86VBroadcast (_.ScalarLdFrag addr:$src3))), (_.VT (bitconvert (CastVT.VT _.RC:$src1))))), (!cast<Instruction>(InstrStr#"rmbk") _.RC:$src1, _.KRCWM:$mask, _.RC:$src2, addr:$src3)>; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index cbe37b64b12..5a8ca994235 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -417,13 +417,6 @@ def X86VPermt2 : SDNode<"X86ISD::VPERMV3", SDTCisSameSizeAs<0,2>, SDTCisSameAs<0,3>]>, []>; -def X86VPermi2X : SDNode<"X86ISD::VPERMIV3", - SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>, - SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, - SDTCisSameSizeAs<0,1>, - SDTCisSameAs<0,2>, - SDTCisSameAs<0,3>]>, []>; - def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index d5263767db1..3e3a62ec82e 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -31,7 +31,7 @@ enum IntrinsicType : uint16_t { FMA_OP_MASK, FMA_OP_MASKZ, FMA_OP_MASK3, FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3, IFMA_OP, - VPERM_2OP, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, + VPERM_2OP, VPERMI_3OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, @@ -1061,42 +1061,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK, - X86ISD::VPERMIV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_512, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_128, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_256, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_hi_512, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_128, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_256, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_pd_512, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_128, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_256, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_ps_512, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_128, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_256, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERMI_3OP_MASK, + X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK, |