summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-02-19 19:23:31 +0000
committerCraig Topper <craig.topper@intel.com>2018-02-19 19:23:31 +0000
commit9471a7c89855ad591ce9178ead5fa59b92de9e45 (patch)
tree162dad66b674fa649a5494e44406c594f955a5ed /llvm/lib/Target
parent545d34a2724ef158d79c5e37dc817ceea9fbd1d8 (diff)
downloadbcm5719-llvm-9471a7c89855ad591ce9178ead5fa59b92de9e45.tar.gz
bcm5719-llvm-9471a7c89855ad591ce9178ead5fa59b92de9e45.zip
[X86] Reduce the number of isel pattern variations needed for VPTESTM/VPTESTNM matching.
Canonicalize EQ/NE PCMPM to have build vector all zeros on the RHS so we don't have to pattern match it in both locations. This significantly reduces the number of isel patterns needed since we also had to multiply it out with loads being in either operand of the 'and' input node and in the 'and' masking node. This removes over 24000 bytes from the isel table. llvm-svn: 325526
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td40
2 files changed, 32 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9f483b72a89..2f2ae32a1d1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17830,6 +17830,14 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
"Cannot set masked compare for this operation");
ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+
+ // If this is a seteq make sure any build vectors of all zeros are on the RHS.
+ // This helps with vptestm matching.
+ // TODO: Should we just canonicalize the setcc during DAG combine?
+ if ((SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE) &&
+ ISD::isBuildVectorAllZeros(Op0.getNode()))
+ std::swap(Op0, Op1);
+
bool Swap = false;
unsigned SSECC;
switch (SetCCOpcode) {
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 395ab8a3c8a..23432072e98 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2189,27 +2189,27 @@ multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
}
}
-def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
- (X86cmpm_c node:$src1, node:$src2, (i8 0))>;
-def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
- (X86cmpm_c node:$src1, node:$src2, (i8 4))>;
+// This fragment treats X86cmpm as commutable to help match loads in both
+// operands for PCMPEQ.
+def X86pcmpeqm_c : PatFrag<(ops node:$src1, node:$src2),
+ (X86cmpm_c node:$src1, node:$src2, (i8 0))>;
def X86pcmpgtm : PatFrag<(ops node:$src1, node:$src2),
(X86cmpm node:$src1, node:$src2, (i8 6))>;
// FIXME: Is there a better scheduler itinerary for VPCMP?
-defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
+defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm_c,
SSE_ALU_F32P, avx512vl_i8_info, HasBWI, 1>,
EVEX_CD8<8, CD8VF>, VEX_WIG;
-defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
+defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm_c,
SSE_ALU_F32P, avx512vl_i16_info, HasBWI, 1>,
EVEX_CD8<16, CD8VF>, VEX_WIG;
-defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
+defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm_c,
SSE_ALU_F32P, avx512vl_i32_info, HasAVX512, 1>,
EVEX_CD8<32, CD8VF>;
-defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
+defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm_c,
SSE_ALU_F32P, avx512vl_i64_info, HasAVX512, 1>,
T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
@@ -3111,16 +3111,16 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask,
let Predicates = [HasAVX512, NoVLX] in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v8i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v8i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v8i32x_info, v16i32_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTD", v4i32x_info, v16i32_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQD", v4i32x_info, v16i32_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQD", v4i32x_info, v16i32_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v4i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v4i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v4i64x_info, v8i64_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTQ", v2i64x_info, v8i64_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQQ", v2i64x_info, v8i64_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQQ", v2i64x_info, v8i64_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VCMPPS", v8f32x_info, v16f32_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPD", v8i32x_info, v16i32_info>;
@@ -3141,16 +3141,16 @@ let Predicates = [HasAVX512, NoVLX] in {
let Predicates = [HasBWI, NoVLX] in {
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v32i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v32i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v32i8x_info, v64i8_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTB", v16i8x_info, v64i8_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQB", v16i8x_info, v64i8_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQB", v16i8x_info, v64i8_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v16i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v16i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v16i16x_info, v32i16_info>;
defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpgtm, "VPCMPGTW", v8i16x_info, v32i16_info>;
- defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm, "VPCMPEQW", v8i16x_info, v32i16_info>;
+ defm : axv512_icmp_packed_no_vlx_lowering<X86pcmpeqm_c, "VPCMPEQW", v8i16x_info, v32i16_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpm, "VPCMPB", v32i8x_info, v64i8_info>;
defm : axv512_icmp_packed_cc_no_vlx_lowering<X86cmpmu, "VPCMPUB", v32i8x_info, v64i8_info>;
@@ -5465,6 +5465,14 @@ multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
}
}
+// These patterns are used to match vptestm/vptestnm. We don't treat pcmpeqm
+// as commutable here because we already canonicalized all zeros vectors to the
+// RHS during lowering.
+def X86pcmpeqm : PatFrag<(ops node:$src1, node:$src2),
+ (X86cmpm node:$src1, node:$src2, (i8 0))>;
+def X86pcmpnem : PatFrag<(ops node:$src1, node:$src2),
+ (X86cmpm node:$src1, node:$src2, (i8 4))>;
+
multiclass avx512_vptest_all_forms<bits<8> opc_wb, bits<8> opc_dq, string OpcodeStr,
PatFrag OpNode, OpndItins itins> :
avx512_vptest_wb <opc_wb, OpcodeStr, OpNode, itins>,
OpenPOWER on IntegriCloud