diff options
author | Igor Breger <igor.breger@intel.com> | 2016-03-03 14:18:38 +0000 |
---|---|---|
committer | Igor Breger <igor.breger@intel.com> | 2016-03-03 14:18:38 +0000 |
commit | 639fde79b0ecf8d7de2701fa75fe323e6b460c6c (patch) | |
tree | 438b176cad0aa8625a0af98d208747b117427171 /llvm | |
parent | f824ced6a1f3e56c9388d03d386cba0114e439ee (diff) | |
download | bcm5719-llvm-639fde79b0ecf8d7de2701fa75fe323e6b460c6c.tar.gz bcm5719-llvm-639fde79b0ecf8d7de2701fa75fe323e6b460c6c.zip |
AVX512: Combine AND + TESTM instructions .
Differential Revision: http://reviews.llvm.org/D17844
llvm-svn: 262621
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/combine-testm-and.ll | 61 |
4 files changed, 86 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 692633c494e..6a51c3aff96 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29013,6 +29013,21 @@ static SDValue combineLockSub(SDNode *N, SelectionDAG &DAG, {Chain, LHS, RHS}, VT, MMO); } +// TEST (AND a, b) ,(AND a, b) -> TEST a, b +static SDValue PerformTESTM(SDNode *N, SelectionDAG &DAG) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + + if (Op0 != Op1 || Op1->getOpcode() != ISD::AND) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + return DAG.getNode(X86ISD::TESTM, DL, VT, + Op0->getOperand(0), Op0->getOperand(1)); +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -29086,6 +29101,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::MGATHER: case ISD::MSCATTER: return combineGatherScatter(N, DAG); case X86ISD::LSUB: return combineLockSub(N, DAG, Subtarget); + case X86ISD::TESTM: return PerformTESTM(N, DAG); } return SDValue(); diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1c380f3f4c1..e4791dec1ed 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3864,6 +3864,7 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { + let isCommutable = 1 in defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 5363649518b..260e846c7c7 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -254,6 +254,11 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU", def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; + +def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>, + SDTCisSameNumEltsAs<0, 1>]>; + def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>; def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>; @@ -264,14 +269,9 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>; -def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisVec<1>, SDTCisSameAs<2, 1>, - SDTCVecEltisVT<0, i1>, - SDTCisSameNumEltsAs<0, 1>]>>; -def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>, - SDTCisVec<1>, SDTCisSameAs<2, 1>, - SDTCVecEltisVT<0, i1>, - SDTCisSameNumEltsAs<0, 1>]>>; +def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>; +def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>; + def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>; def X86pmuludq : SDNode<"X86ISD::PMULUDQ", diff --git a/llvm/test/CodeGen/X86/combine-testm-and.ll b/llvm/test/CodeGen/X86/combine-testm-and.ll new file mode 100644 index 00000000000..76f542f8445 --- /dev/null +++ b/llvm/test/CodeGen/X86/combine-testm-and.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s + +define i32 @combineTESTM_AND_1(<8 x i64> %a, <8 x i64> %b) { +; CHECK-LABEL: combineTESTM_AND_1: +; CHECK: ## BB#0: +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 -1) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_2(<8 x i64> %a, <8 x i64> %b , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_2: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_mask_3(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_mask_3: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %b = load <8 x i64>, <8 x i64>* %bptr + %and.i = and <8 x i64> %a, %b + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +define i32 @combineTESTM_AND_mask_4(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) { +; CHECK-LABEL: combineTESTM_AND_mask_4: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %esi, %k1 +; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovb %k0, %eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: retq + %b = load <8 x i64>, <8 x i64>* %bptr + %and.i = and <8 x i64> %b, %a + %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask) + %conv = zext i8 %test.i to i32 + ret i32 %conv +} + +declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8) |