diff options
author | Craig Topper <craig.topper@intel.com> | 2018-01-28 00:56:30 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@intel.com> | 2018-01-28 00:56:30 +0000 |
commit | 15d69739e2859ac0da88cfd4cfe0dccdfbdb6202 (patch) | |
tree | 0d7333c6fc6dc914d79a5aacb178692b05738cc7 /llvm/lib | |
parent | 9945c44ea6edb9fd6a76ee6f4454f17aa109d2b2 (diff) | |
download | bcm5719-llvm-15d69739e2859ac0da88cfd4cfe0dccdfbdb6202.tar.gz bcm5719-llvm-15d69739e2859ac0da88cfd4cfe0dccdfbdb6202.zip |
[X86] Remove VPTESTM/VPTESTNM ISD opcodes. Use isel patterns matching cmpm eq/ne with immallzeros.
llvm-svn: 323612
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 57 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 99 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 |
5 files changed, 76 insertions, 93 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 66ba24cf10e..e325d975486 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -451,8 +451,7 @@ namespace { // type. static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { unsigned Opcode = N->getOpcode(); - if (Opcode == X86ISD::CMPM || Opcode == X86ISD::TESTM || - Opcode == X86ISD::TESTNM || Opcode == X86ISD::CMPMU || + if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMU || Opcode == X86ISD::CMPM_RND) { // We can get 256-bit 8 element types here without VLX being enabled. When // this happens we will use 512-bit operations and the mask will not be diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 776632551d5..4b1efab9430 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5043,8 +5043,6 @@ static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) { switch (Opcode) { default: return false; - case X86ISD::TESTM: - case X86ISD::TESTNM: case X86ISD::CMPM: case X86ISD::CMPMU: case X86ISD::CMPM_RND: @@ -14639,9 +14637,11 @@ SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { assert(Cond.getValueType().getScalarSizeInBits() == VT.getScalarSizeInBits() && "Should have a size-matched integer condition!"); - // Build a mask by testing the condition against itself (tests for zero). + // Build a mask by testing the condition against zero. MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue Mask = DAG.getNode(X86ISD::TESTM, dl, MaskVT, Cond, Cond); + SDValue Mask = DAG.getNode(X86ISD::CMPM, dl, MaskVT, Cond, + getZeroVector(VT, Subtarget, DAG, dl), + DAG.getConstant(4, dl, MVT::i8)); // Now return a new VSELECT using the mask. return DAG.getSelect(dl, VT, Mask, Op.getOperand(1), Op.getOperand(2)); } @@ -16609,7 +16609,9 @@ static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, In = DAG.getNode(ISD::SHL, DL, InVT, In, DAG.getConstant(ShiftInx, DL, InVT)); } - return DAG.getNode(X86ISD::TESTM, DL, VT, In, In); + return DAG.getNode(X86ISD::CMPM, DL, VT, In, + getZeroVector(InVT, Subtarget, DAG, DL), + DAG.getConstant(4, DL, MVT::i8)); } SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { @@ -17766,26 +17768,6 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { if (Swap) std::swap(Op0, Op1); - // See if it is the case of CMP(EQ|NEQ,AND(A,B),ZERO) and change it to TESTM|NM. - if (SSECC == 4 || SSECC == 0) { - SDValue A = peekThroughBitcasts(Op0); - if ((A.getOpcode() == ISD::AND || A.getOpcode() == X86ISD::FAND) && - ISD::isBuildVectorAllZeros(Op1.getNode())) { - MVT VT0 = Op0.getSimpleValueType(); - SDValue RHS = DAG.getBitcast(VT0, A.getOperand(0)); - SDValue LHS = DAG.getBitcast(VT0, A.getOperand(1)); - return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM, - dl, VT, RHS, LHS); - } - - // If this is just a comparison with 0 without an AND, we can just use - // the same input twice to avoid creating a zero vector. - if (ISD::isBuildVectorAllZeros(Op1.getNode())) { - return DAG.getNode(SSECC == 0 ? X86ISD::TESTNM : X86ISD::TESTM, - dl, VT, Op0, Op0); - } - } - unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode) ? X86ISD::CMPMU : X86ISD::CMPM; return DAG.getNode(Opc, dl, VT, Op0, Op1, @@ -25365,8 +25347,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVMSK: return "X86ISD::MOVMSK"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; - case X86ISD::TESTM: return "X86ISD::TESTM"; - case X86ISD::TESTNM: return "X86ISD::TESTNM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; case X86ISD::KTEST: return "X86ISD::KTEST"; case X86ISD::KSHIFTL: return "X86ISD::KSHIFTL"; @@ -37674,28 +37654,6 @@ static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG, return SDValue(); } -static SDValue combineTestM(SDNode *N, SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - SDValue Op0 = N->getOperand(0); - SDValue Op1 = N->getOperand(1); - - MVT VT = N->getSimpleValueType(0); - SDLoc DL(N); - - // TEST (AND a, b) ,(AND a, b) -> TEST a, b - if (Op0 == Op1 && Op1->getOpcode() == ISD::AND) - return DAG.getNode(X86ISD::TESTM, DL, VT, Op0->getOperand(0), - Op0->getOperand(1)); - - // TEST op0, BUILD_VECTOR(all_zero) -> BUILD_VECTOR(all_zero) - // TEST BUILD_VECTOR(all_zero), op1 -> BUILD_VECTOR(all_zero) - if (ISD::isBuildVectorAllZeros(Op0.getNode()) || - ISD::isBuildVectorAllZeros(Op1.getNode())) - return getZeroVector(VT, Subtarget, DAG, DL); - - return SDValue(); -} - static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, const X86Subtarget &Subtarget) { MVT VT = N->getSimpleValueType(0); @@ -38001,7 +37959,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::MSCATTER: case ISD::MGATHER: case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI, Subtarget); - case X86ISD::TESTM: return combineTestM(N, DAG, Subtarget); case X86ISD::PCMPEQ: case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index e63a4b810a1..ad166ff42da 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -368,10 +368,6 @@ namespace llvm { // Vector packed fp sign bitwise comparisons. TESTP, - // Vector "test" in AVX-512, the result is in a mask vector. - TESTM, - TESTNM, - // OR/AND test for masks. KORTEST, KTEST, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index e095011d875..0b8cebb2e8b 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2084,6 +2084,8 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr, def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 0))>; +def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2), + (X86cmpm node:$src1, node:$src2, (i8 4))>; def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2), (X86cmpm node:$src1, node:$src2, (i8 6))>; @@ -5197,42 +5199,57 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs // AVX-512 VPTESTM instructions //===----------------------------------------------------------------------===// -multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode, - OpndItins itins, X86VectorVTInfo _> { +multiclass avx512_vptest<bits<8> opc, string OpcodeStr, PatFrag OpNode, + OpndItins itins, X86VectorVTInfo _, string Suffix> { let ExeDomain = _.ExeDomain in { let isCommutable = 1 in defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)), itins.rr>, + (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), + _.ImmAllZerosV), itins.rr>, EVEX_4V, Sched<[itins.Sched]>; defm rm : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (OpNode (_.VT _.RC:$src1), - (_.VT (bitconvert (_.LdFrag addr:$src2)))), itins.rm>, - EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, + (OpNode (bitconvert + (_.i64VT (and _.RC:$src1, + (bitconvert (_.LdFrag addr:$src2))))), + _.ImmAllZerosV), + itins.rm>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } + + // Patterns for compare with 0 that just use the same source twice. + def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), + (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rr") + _.RC:$src, _.RC:$src))>; + + def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), + (_.KVT (!cast<Instruction>(NAME # Suffix # _.ZSuffix # "rrk") + _.KRC:$mask, _.RC:$src, _.RC:$src))>; } -multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_vptest_mb<bits<8> opc, string OpcodeStr, PatFrag OpNode, OpndItins itins, X86VectorVTInfo _> { let ExeDomain = _.ExeDomain in defm rmb : AVX512_maskable_cmp<opc, MRMSrcMem, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr, "${src2}"##_.BroadcastStr##", $src1", "$src1, ${src2}"##_.BroadcastStr, - (OpNode (_.VT _.RC:$src1), (_.VT (X86VBroadcast - (_.ScalarLdFrag addr:$src2)))), + (OpNode (and _.RC:$src1, + (X86VBroadcast + (_.ScalarLdFrag addr:$src2))), + _.ImmAllZerosV), itins.rm>, EVEX_B, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>, Sched<[itins.Sched.Folded, ReadAfterLd]>; } // Use 512bit version to implement 128/256 bit in case NoVLX. -multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo, +multiclass avx512_vptest_lowering<PatFrag OpNode, X86VectorVTInfo ExtendInfo, X86VectorVTInfo _, string Suffix> { - def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), + def : Pat<(_.KVT (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), + _.ImmAllZerosV)), (_.KVT (COPY_TO_REGCLASS (!cast<Instruction>(NAME # Suffix # "Zrr") (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), @@ -5242,7 +5259,8 @@ multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo, _.KRC))>; def : Pat<(_.KVT (and _.KRC:$mask, - (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), + (OpNode (bitconvert (_.i64VT (and _.RC:$src1, _.RC:$src2))), + _.ImmAllZerosV))), (COPY_TO_REGCLASS (!cast<Instruction>(NAME # Suffix # "Zrrk") (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), @@ -5251,19 +5269,38 @@ multiclass avx512_vptest_lowering<SDNode OpNode, X86VectorVTInfo ExtendInfo, (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), _.RC:$src2, _.SubRegIdx)), _.KRC)>; + + def : Pat<(_.KVT (OpNode _.RC:$src, _.ImmAllZerosV)), + (_.KVT (COPY_TO_REGCLASS + (!cast<Instruction>(NAME # Suffix # "Zrr") + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src, _.SubRegIdx), + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src, _.SubRegIdx)), + _.KRC))>; + + def : Pat<(_.KVT (and _.KRC:$mask, (OpNode _.RC:$src, _.ImmAllZerosV))), + (COPY_TO_REGCLASS + (!cast<Instruction>(NAME # Suffix # "Zrrk") + (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src, _.SubRegIdx), + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src, _.SubRegIdx)), + _.KRC)>; } -multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, PatFrag OpNode, OpndItins itins, AVX512VLVectorVTInfo _, string Suffix> { let Predicates = [HasAVX512] in - defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512>, + defm Z : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info512, Suffix>, avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256>, + defm Z256 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info256, Suffix>, avx512_vptest_mb<opc, OpcodeStr, OpNode,itins, _.info256>, EVEX_V256; - defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128>, + defm Z128 : avx512_vptest<opc, OpcodeStr, OpNode, itins, _.info128, Suffix>, avx512_vptest_mb<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { @@ -5272,7 +5309,7 @@ multiclass avx512_vptest_dq_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, } } -multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode, +multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, PatFrag OpNode, OpndItins itins> { defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", OpNode, itins, avx512vl_i32_info, "D">; @@ -5281,41 +5318,41 @@ multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr, SDNode OpNode, } multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr, - SDNode OpNode, OpndItins itins> { + PatFrag OpNode, OpndItins itins> { let Predicates = [HasBWI] in { - defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info>, + defm WZ: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v32i16_info, "W">, EVEX_V512, VEX_W; - defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info>, + defm BZ: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v64i8_info, "B">, EVEX_V512; } let Predicates = [HasVLX, HasBWI] in { - defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info>, + defm WZ256: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v16i16x_info, "W">, EVEX_V256, VEX_W; - defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info>, + defm WZ128: avx512_vptest<opc, OpcodeStr#"w", OpNode, itins, v8i16x_info, "W">, EVEX_V128, VEX_W; - defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info>, + defm BZ256: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v32i8x_info, "B">, EVEX_V256; - defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info>, + defm BZ128: avx512_vptest<opc, OpcodeStr#"b", OpNode, itins, v16i8x_info, "B">, EVEX_V128; } let Predicates = [HasAVX512, NoVLX] in { - defm BZ256_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v32i8x_info, "B">; - defm BZ128_Alt : avx512_vptest_lowering< OpNode, v64i8_info, v16i8x_info, "B">; - defm WZ256_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v16i16x_info, "W">; - defm WZ128_Alt : avx512_vptest_lowering< OpNode, v32i16_info, v8i16x_info, "W">; + defm BZ256_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v32i8x_info, "B">; + defm BZ128_Alt : avx512_vptest_lowering<OpNode, v64i8_info, v16i8x_info, "B">; + defm WZ256_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v16i16x_info, "W">; + defm WZ128_Alt : avx512_vptest_lowering<OpNode, v32i16_info, v8i16x_info, "W">; } } multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr, - SDNode OpNode, OpndItins itins> : + PatFrag OpNode, OpndItins itins> : avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>, avx512_vptest_dq<opc_dq, OpcodeStr, OpNode, itins>; -defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86testm, +defm VPTESTM : avx512_vptest_all_forms<0x26, 0x27, "vptestm", X86pcmpnem, SSE_BIT_ITINS_P>, T8PD; -defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm, +defm VPTESTNM : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86pcmpeqm, SSE_BIT_ITINS_P>, T8XS; diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 49c63c39711..beb94552dbb 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -234,10 +234,6 @@ def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; -def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, - SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>, - SDTCisSameNumEltsAs<0, 1>]>; - def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>; def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>; @@ -248,8 +244,6 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; -def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>; -def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>; def X86movmsk : SDNode<"X86ISD::MOVMSK", SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVec<1>]>>; |