diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 66 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 66 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/tbm_patterns.ll | 6 |
3 files changed, 68 insertions, 70 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index be079659da4..25a8567a9c1 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -457,7 +457,6 @@ namespace { } bool foldLoadStoreIntoMemOperand(SDNode *Node); - bool matchBEXTRFromAnd(SDNode *Node); bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; bool tryShiftAmountMod(SDNode *N); @@ -2582,69 +2581,6 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { return true; } -// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. -bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) { - MVT NVT = Node->getSimpleValueType(0); - SDLoc dl(Node); - - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - - // If we have TBM we can use an immediate for the control. If we have BMI - // we should only do this if the BEXTR instruction is implemented well. - // Otherwise moving the control into a register makes this more costly. - // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM - // hoisting the move immediate would make it worthwhile with a less optimal - // BEXTR? - if (!Subtarget->hasTBM() && - !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR())) - return false; - - // Must have a shift right. - if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) - return false; - - // Shift can't have additional users. - if (!N0->hasOneUse()) - return false; - - // Only supported for 32 and 64 bits. - if (NVT != MVT::i32 && NVT != MVT::i64) - return false; - - // Shift amount and RHS of and must be constant. - ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1); - ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); - if (!MaskCst || !ShiftCst) - return false; - - // And RHS must be a mask. - uint64_t Mask = MaskCst->getZExtValue(); - if (!isMask_64(Mask)) - return false; - - uint64_t Shift = ShiftCst->getZExtValue(); - uint64_t MaskSize = countPopulation(Mask); - - // Don't interfere with something that can be handled by extracting AH. - // TODO: If we are able to fold a load, BEXTR might still be better than AH. - if (Shift == 8 && MaskSize == 8) - return false; - - // Make sure we are only using bits that were in the original value, not - // shifted in. - if (Shift + MaskSize > NVT.getSizeInBits()) - return false; - - // Create a BEXTR node and run it through selection. - SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT); - SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT, - N0->getOperand(0), C); - ReplaceNode(Node, New.getNode()); - SelectCode(New.getNode()); - return true; -} - // Emit a PCMISTR(I/M) instruction. MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, @@ -2952,8 +2888,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) { break; case ISD::AND: - if (matchBEXTRFromAnd(Node)) - return; if (AndImmShrink && shrinkAndImmediate(Node)) return; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 67f98d8ee72..ab9a14a65a1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35278,6 +35278,69 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, return SDValue(); } +static bool hasBEXTR(const X86Subtarget &Subtarget, EVT VT) { + // If we have TBM we can use an immediate for the control. If we have BMI + // we should only do this if the BEXTR instruction is implemented well. + // Otherwise moving the control into a register makes this more costly. + // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM + // hoisting the move immediate would make it worthwhile with a less optimal + // BEXTR? + if (!Subtarget.hasTBM() && !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) + return false; + return (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())); +} + +// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI. +static SDValue combineAndIntoBEXTR(SDNode *Node, SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + EVT NVT = Node->getValueType(0); + SDLoc dl(Node); + + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + // Check if subtarget has BEXTR instruction for the node's type + if (!hasBEXTR(Subtarget, NVT)) + return SDValue(); + + // Must have a shift right. + if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) + return SDValue(); + + // Shift can't have additional users. + if (!N0->hasOneUse()) + return SDValue(); + + // Shift amount and RHS of and must be constant. + ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + if (!MaskCst || !ShiftCst) + return SDValue(); + + // And RHS must be a mask. + uint64_t Mask = MaskCst->getZExtValue(); + if (!isMask_64(Mask)) + return SDValue(); + + uint64_t Shift = ShiftCst->getZExtValue(); + uint64_t MaskSize = countPopulation(Mask); + + // Don't interfere with something that can be handled by extracting AH. + // TODO: If we are able to fold a load, BEXTR might still be better than AH. + if (Shift == 8 && MaskSize == 8) + return SDValue(); + + // Make sure we are only using bits that were in the original value, not + // shifted in. + if (Shift + MaskSize > NVT.getSizeInBits()) + return SDValue(); + + // Create a BEXTR node. + SDValue C = DAG.getConstant(Shift | (MaskSize << 8), dl, NVT); + SDValue New = DAG.getNode(X86ISD::BEXTR, dl, NVT, N0->getOperand(0), C); + return New; +} + // Look for (and (ctpop X), 1) which is the IR form of __builtin_parity. // Turn it into series of XORs and a setnp. static SDValue combineParity(SDNode *N, SelectionDAG &DAG, @@ -35379,6 +35442,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalizeOps()) return SDValue(); + if (SDValue R = combineAndIntoBEXTR(N, DAG, Subtarget)) + return R; + if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) return R; diff --git a/llvm/test/CodeGen/X86/tbm_patterns.ll b/llvm/test/CodeGen/X86/tbm_patterns.ll index 6865cc5a0ef..2b335ea4268 100644 --- a/llvm/test/CodeGen/X86/tbm_patterns.ll +++ b/llvm/test/CodeGen/X86/tbm_patterns.ll @@ -53,8 +53,7 @@ define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: shrl $4, %edi -; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF +; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 @@ -114,8 +113,7 @@ define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: shrl $4, %edi -; CHECK-NEXT: testl $4095, %edi # imm = 0xFFF +; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 |

