diff options
author | Roman Lebedev <lebedev.ri@gmail.com> | 2018-10-22 14:12:44 +0000 |
---|---|---|
committer | Roman Lebedev <lebedev.ri@gmail.com> | 2018-10-22 14:12:44 +0000 |
commit | 898808504dcc2eb64a2962e40bb3096203d12d6b (patch) | |
tree | a759bc1e652abc7b2d17e57e7b69b73ebbce3a75 /llvm/lib/Target | |
parent | 7efbd8daf4cbb911518a591186cc8fce87ab0c22 (diff) | |
download | bcm5719-llvm-898808504dcc2eb64a2962e40bb3096203d12d6b.tar.gz bcm5719-llvm-898808504dcc2eb64a2962e40bb3096203d12d6b.zip |
[X86] X86DAGToDAGISel: handle BZHI selection too, not just BEXTR.
Summary:
As discussed in D52304 / IRC, we now have pattern matching for
'bit extract' in two places - tablegen and `X86DAGToDAGISel`.
There are 4 patterns.
And we will have a problem with `x & (-1 >> (32 - y))` pattern.
* If the mask is one-use, then it is always unfolded into `x << (32 - y) >> (32 - y)` first.
Thus, the existing test coverage is already broken.
* If it is not one-use, then it is not unfolded, and is matched as BZHI.
* If it is not one-use, we will not match it as BEXTR. And if it is one-use, it will have been unfolded already.
So we will either not handle that pattern for BEXTR, or not have test coverage for it.
This is bad.
As discussed with @craig.topper, let's unify this matching, and do everything in `X86DAGToDAGISel`.
Then we will not have code duplication, and will have proper test coverage.
This indeed does not affect any tests, and this is great.
It means that for these two patterns, the `X86DAGToDAGISel` is identical to the tablegen version.
Please review carefully, i'm not fully sure about that intrinsic change, and introduction of the new `X86ISD` opcode.
Reviewers: craig.topper, RKSimon, spatel
Reviewed By: craig.topper
Subscribers: llvm-commits, craig.topper
Differential Revision: https://reviews.llvm.org/D53164
llvm-svn: 344904
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 40 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 18 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 2 |
5 files changed, 38 insertions, 26 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index b6503754497..d3aa5c89adc 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -460,7 +460,7 @@ namespace { bool foldLoadStoreIntoMemOperand(SDNode *Node); MachineSDNode *matchBEXTRFromAndImm(SDNode *Node); - bool matchBEXTR(SDNode *Node); + bool matchBitExtract(SDNode *Node); bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); @@ -2681,15 +2681,15 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { return true; } -// See if this is an X & Mask that we can match to BEXTR. +// See if this is an X & Mask that we can match to BEXTR/BZHI. // Where Mask is one of the following patterns: // a) x & (1 << nbits) - 1 // b) x & ~(-1 << nbits) // c) x & (-1 >> (32 - y)) // d) x << (32 - y) >> (32 - y) -bool X86DAGToDAGISel::matchBEXTR(SDNode *Node) { - // BEXTR is BMI instruction. However, if we have BMI2, we prefer BZHI. - if (!Subtarget->hasBMI() || Subtarget->hasBMI2()) +bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { + // BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one. + if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) return false; MVT NVT = Node->getSimpleValueType(0); @@ -2700,17 +2700,24 @@ bool X86DAGToDAGISel::matchBEXTR(SDNode *Node) { SDValue NBits; + // If we have BMI2's BZHI, we are ok with muti-use patterns. + // Else, if we only have BMI1's BEXTR, we require one-use. + const bool CanHaveExtraUses = Subtarget->hasBMI2(); + auto checkOneUse = [CanHaveExtraUses](SDValue Op) { + return CanHaveExtraUses || Op.hasOneUse(); + }; + // a) x & ((1 << nbits) + (-1)) - auto matchPatternA = [&NBits](SDValue Mask) -> bool { + auto matchPatternA = [&checkOneUse, &NBits](SDValue Mask) -> bool { // Match `add`. Must only have one use! - if (Mask->getOpcode() != ISD::ADD || !Mask->hasOneUse()) + if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) return false; // We should be adding all-ones constant (i.e. subtracting one.) if (!isAllOnesConstant(Mask->getOperand(1))) return false; // Match `1 << nbits`. Must only have one use! SDValue M0 = Mask->getOperand(0); - if (M0->getOpcode() != ISD::SHL || !M0->hasOneUse()) + if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) return false; if (!isOneConstant(M0->getOperand(0))) return false; @@ -2719,13 +2726,13 @@ bool X86DAGToDAGISel::matchBEXTR(SDNode *Node) { }; // b) x & ~(-1 << nbits) - auto matchPatternB = [&NBits](SDValue Mask) -> bool { + auto matchPatternB = [&checkOneUse, &NBits](SDValue Mask) -> bool { // Match `~()`. Must only have one use! - if (!isBitwiseNot(Mask) || !Mask->hasOneUse()) + if (!isBitwiseNot(Mask) || !checkOneUse(Mask)) return false; // Match `-1 << nbits`. Must only have one use! SDValue M0 = Mask->getOperand(0); - if (M0->getOpcode() != ISD::SHL || !M0->hasOneUse()) + if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) return false; if (!isAllOnesConstant(M0->getOperand(0))) return false; @@ -2761,6 +2768,15 @@ bool X86DAGToDAGISel::matchBEXTR(SDNode *Node) { NBits = CurDAG->getTargetInsertSubreg(X86::sub_8bit, DL, NVT, ImplDef, NBits); insertDAGNode(*CurDAG, OrigNBits, NBits); + if (Subtarget->hasBMI2()) { + // Great, just emit the the BZHI.. + SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits); + ReplaceNode(Node, Extract.getNode()); + SelectCode(Extract.getNode()); + return true; + } + + // Else, emitting BEXTR requires one more step. // The 'control' of BEXTR has the pattern of: // [15...8 bit][ 7...0 bit] location // [ bit count][ shift] name @@ -3168,7 +3184,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) { CurDAG->RemoveDeadNode(Node); return; } - if (matchBEXTR(Node)) + if (matchBitExtract(Node)) return; if (AndImmShrink && shrinkAndImmediate(Node)) return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ae5795db4ab..cdd76e1f03d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -26630,6 +26630,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; case X86ISD::BEXTR: return "X86ISD::BEXTR"; + case X86ISD::BZHI: return "X86ISD::BZHI"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::MOVMSK: return "X86ISD::MOVMSK"; case X86ISD::PTEST: return "X86ISD::PTEST"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 3e6c8929a9b..15321b12ff6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -355,6 +355,9 @@ namespace llvm { // Bit field extract. BEXTR, + // Zero High Bits Starting with Specified Bit Position. + BZHI, + // LOW, HI, FLAGS = umul LHS, RHS. UMUL, diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 15ed435244e..39c3bbfd90e 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -291,6 +291,8 @@ def X86lock_dec : SDNode<"X86ISD::LDEC", SDTLockUnaryArithWithFlags, def X86bextr : SDNode<"X86ISD::BEXTR", SDTIntBinOp>; +def X86bzhi : SDNode<"X86ISD::BZHI", SDTIntBinOp>; + def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDT_X86WIN_ALLOCA, @@ -2454,9 +2456,9 @@ multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC, let Predicates = [HasBMI2], Defs = [EFLAGS] in { defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem, - int_x86_bmi_bzhi_32, loadi32, WriteBZHI>; + X86bzhi, loadi32, WriteBZHI>; defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem, - int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W; + X86bzhi, loadi64, WriteBZHI>, VEX_W; } def CountTrailingOnes : SDNodeXForm<imm, [{ @@ -2512,18 +2514,6 @@ let Predicates = [HasBMI2] in { multiclass bmi_bzhi_patterns<RegisterClass RC, int bitwidth, ValueType VT, Instruction DstInst, X86MemOperand x86memop, Instruction DstMemInst> { - // x & ((1 << y) - 1) - defm : _bmi_bzhi_pattern<(and RC:$src, (add (shl 1, GR8:$lz), -1)), - (and (x86memop addr:$src), - (add (shl 1, GR8:$lz), -1)), - RC, VT, DstInst, DstMemInst>; - - // x & ~(-1 << y) - defm : _bmi_bzhi_pattern<(and RC:$src, (xor (shl -1, GR8:$lz), -1)), - (and (x86memop addr:$src), - (xor (shl -1, GR8:$lz), -1)), - RC, VT, DstInst, DstMemInst>; - // x & (-1 >> (bitwidth - y)) defm : _bmi_bzhi_pattern<(and RC:$src, (srl -1, (sub bitwidth, GR8:$lz))), (and (x86memop addr:$src), diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 84c7878de61..252d64808f0 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1120,6 +1120,8 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_vpshrd_w_512, INTR_TYPE_3OP_IMM8, X86ISD::VSHRD, 0), X86_INTRINSIC_DATA(bmi_bextr_32, INTR_TYPE_2OP, X86ISD::BEXTR, 0), X86_INTRINSIC_DATA(bmi_bextr_64, INTR_TYPE_2OP, X86ISD::BEXTR, 0), + X86_INTRINSIC_DATA(bmi_bzhi_32, INTR_TYPE_2OP, X86ISD::BZHI, 0), + X86_INTRINSIC_DATA(bmi_bzhi_64, INTR_TYPE_2OP, X86ISD::BZHI, 0), X86_INTRINSIC_DATA(sse_cmp_ps, INTR_TYPE_3OP, X86ISD::CMPP, 0), X86_INTRINSIC_DATA(sse_comieq_ss, COMI, X86ISD::COMI, ISD::SETEQ), X86_INTRINSIC_DATA(sse_comige_ss, COMI, X86ISD::COMI, ISD::SETGE), |