summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorIgor Breger <igor.breger@intel.com>2016-03-03 14:18:38 +0000
committerIgor Breger <igor.breger@intel.com>2016-03-03 14:18:38 +0000
commit639fde79b0ecf8d7de2701fa75fe323e6b460c6c (patch)
tree438b176cad0aa8625a0af98d208747b117427171 /llvm
parentf824ced6a1f3e56c9388d03d386cba0114e439ee (diff)
downloadbcm5719-llvm-639fde79b0ecf8d7de2701fa75fe323e6b460c6c.tar.gz
bcm5719-llvm-639fde79b0ecf8d7de2701fa75fe323e6b460c6c.zip
AVX512: Combine AND + TESTM instructions .
Differential Revision: http://reviews.llvm.org/D17844 llvm-svn: 262621
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp16
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td16
-rw-r--r--llvm/test/CodeGen/X86/combine-testm-and.ll61
4 files changed, 86 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 692633c494e..6a51c3aff96 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29013,6 +29013,21 @@ static SDValue combineLockSub(SDNode *N, SelectionDAG &DAG,
{Chain, LHS, RHS}, VT, MMO);
}
+// TEST (AND a, b) ,(AND a, b) -> TEST a, b
+static SDValue PerformTESTM(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ if (Op0 != Op1 || Op1->getOpcode() != ISD::AND)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ return DAG.getNode(X86ISD::TESTM, DL, VT,
+ Op0->getOperand(0), Op0->getOperand(1));
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -29086,6 +29101,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::MGATHER:
case ISD::MSCATTER: return combineGatherScatter(N, DAG);
case X86ISD::LSUB: return combineLockSub(N, DAG, Subtarget);
+ case X86ISD::TESTM: return PerformTESTM(N, DAG);
}
return SDValue();
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 1c380f3f4c1..e4791dec1ed 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3864,6 +3864,7 @@ defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD;
multiclass avx512_vptest<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _> {
+ let isCommutable = 1 in
defm rr : AVX512_maskable_cmp<opc, MRMSrcReg, _, (outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5363649518b..260e846c7c7 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -254,6 +254,11 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU",
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
SDTCisVec<1>,
SDTCisSameAs<2, 1>]>;
+
+def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
+ SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>,
+ SDTCisSameNumEltsAs<0, 1>]>;
+
def X86addus : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
def X86adds : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
@@ -264,14 +269,9 @@ def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
def X86ktest : SDNode<"X86ISD::KTEST", SDTX86CmpPTest>;
-def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>,
- SDTCisVec<1>, SDTCisSameAs<2, 1>,
- SDTCVecEltisVT<0, i1>,
- SDTCisSameNumEltsAs<0, 1>]>>;
-def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>,
- SDTCisVec<1>, SDTCisSameAs<2, 1>,
- SDTCVecEltisVT<0, i1>,
- SDTCisSameNumEltsAs<0, 1>]>>;
+def X86testm : SDNode<"X86ISD::TESTM", SDTX86Testm, [SDNPCommutative]>;
+def X86testnm : SDNode<"X86ISD::TESTNM", SDTX86Testm, [SDNPCommutative]>;
+
def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>;
def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
diff --git a/llvm/test/CodeGen/X86/combine-testm-and.ll b/llvm/test/CodeGen/X86/combine-testm-and.ll
new file mode 100644
index 00000000000..76f542f8445
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-testm-and.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+;RUN: llc -mtriple=x86_64-apple-darwin -mcpu=skx < %s | FileCheck %s
+
+define i32 @combineTESTM_AND_1(<8 x i64> %a, <8 x i64> %b) {
+; CHECK-LABEL: combineTESTM_AND_1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %and.i = and <8 x i64> %b, %a
+ %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 -1)
+ %conv = zext i8 %test.i to i32
+ ret i32 %conv
+}
+
+define i32 @combineTESTM_AND_2(<8 x i64> %a, <8 x i64> %b , i8 %mask) {
+; CHECK-LABEL: combineTESTM_AND_2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %and.i = and <8 x i64> %b, %a
+ %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask)
+ %conv = zext i8 %test.i to i32
+ ret i32 %conv
+}
+
+define i32 @combineTESTM_AND_mask_3(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) {
+; CHECK-LABEL: combineTESTM_AND_mask_3:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %esi, %k1
+; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %b = load <8 x i64>, <8 x i64>* %bptr
+ %and.i = and <8 x i64> %a, %b
+ %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask)
+ %conv = zext i8 %test.i to i32
+ ret i32 %conv
+}
+
+define i32 @combineTESTM_AND_mask_4(<8 x i64> %a, <8 x i64>* %bptr , i8 %mask) {
+; CHECK-LABEL: combineTESTM_AND_mask_4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %esi, %k1
+; CHECK-NEXT: vptestmq (%rdi), %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovb %k0, %eax
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: retq
+ %b = load <8 x i64>, <8 x i64>* %bptr
+ %and.i = and <8 x i64> %b, %a
+ %test.i = tail call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %and.i, <8 x i64> %and.i, i8 %mask)
+ %conv = zext i8 %test.i to i32
+ ret i32 %conv
+}
+
+declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
OpenPOWER on IntegriCloud