3 files changed, 68 insertions, 70 deletions
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index be079659da4..25a8567a9c1 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -457,7 +457,6 @@ namespace {
     }
 
     bool foldLoadStoreIntoMemOperand(SDNode *Node);
-    bool matchBEXTRFromAnd(SDNode *Node);
     bool shrinkAndImmediate(SDNode *N);
     bool isMaskZeroExtended(SDNode *N) const;
     bool tryShiftAmountMod(SDNode *N);
@@ -2582,69 +2581,6 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
   return true;
 }
 
-// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
-bool X86DAGToDAGISel::matchBEXTRFromAnd(SDNode *Node) {
-  MVT NVT = Node->getSimpleValueType(0);
-  SDLoc dl(Node);
-
-  SDValue N0 = Node->getOperand(0);
-  SDValue N1 = Node->getOperand(1);
-
-  // If we have TBM we can use an immediate for the control. If we have BMI
-  // we should only do this if the BEXTR instruction is implemented well.
-  // Otherwise moving the control into a register makes this more costly.
-  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
-  // hoisting the move immediate would make it worthwhile with a less optimal
-  // BEXTR?
-  if (!Subtarget->hasTBM() &&
-      !(Subtarget->hasBMI() && Subtarget->hasFastBEXTR()))
-    return false;
-
-  // Must have a shift right.
-  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
-    return false;
-
-  // Shift can't have additional users.
-  if (!N0->hasOneUse())
-    return false;
-
-  // Only supported for 32 and 64 bits.
-  if (NVT != MVT::i32 && NVT != MVT::i64)
-    return false;
-
-  // Shift amount and RHS of and must be constant.
-  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
-  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
-  if (!MaskCst || !ShiftCst)
-    return false;
-
-  // And RHS must be a mask.
-  uint64_t Mask = MaskCst->getZExtValue();
-  if (!isMask_64(Mask))
-    return false;
-
-  uint64_t Shift = ShiftCst->getZExtValue();
-  uint64_t MaskSize = countPopulation(Mask);
-
-  // Don't interfere with something that can be handled by extracting AH.
-  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
-  if (Shift == 8 && MaskSize == 8)
-    return false;
-
-  // Make sure we are only using bits that were in the original value, not
-  // shifted in.
-  if (Shift + MaskSize > NVT.getSizeInBits())
-    return false;
-
-  // Create a BEXTR node and run it through selection.
-  SDValue C = CurDAG->getConstant(Shift | (MaskSize << 8), dl, NVT);
-  SDValue New = CurDAG->getNode(X86ISD::BEXTR, dl, NVT,
-                                N0->getOperand(0), C);
-  ReplaceNode(Node, New.getNode());
-  SelectCode(New.getNode());
-  return true;
-}
-
 // Emit a PCMISTR(I/M) instruction.
 MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
                                              bool MayFoldLoad, const SDLoc &dl,
@@ -2952,8 +2888,6 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     break;
 
   case ISD::AND:
-    if (matchBEXTRFromAnd(Node))
-      return;
     if (AndImmShrink && shrinkAndImmediate(Node))
       return;
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 67f98d8ee72..ab9a14a65a1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35278,6 +35278,69 @@ static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG,
   return SDValue();
 }
 
+static bool hasBEXTR(const X86Subtarget &Subtarget, EVT VT) {
+  // If we have TBM we can use an immediate for the control. If we have BMI
+  // we should only do this if the BEXTR instruction is implemented well.
+  // Otherwise moving the control into a register makes this more costly.
+  // TODO: Maybe load folding, greater than 32-bit masks, or a guarantee of LICM
+  // hoisting the move immediate would make it worthwhile with a less optimal
+  // BEXTR?
+  if (!Subtarget.hasTBM() && !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR()))
+    return false;
+  return (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
+}
+
+// See if this is an (X >> C1) & C2 that we can match to BEXTR/BEXTRI.
+static SDValue combineAndIntoBEXTR(SDNode *Node, SelectionDAG &DAG,
+                                   const X86Subtarget &Subtarget) {
+  EVT NVT = Node->getValueType(0);
+  SDLoc dl(Node);
+
+  SDValue N0 = Node->getOperand(0);
+  SDValue N1 = Node->getOperand(1);
+
+  // Check if subtarget has BEXTR instruction for the node's type
+  if (!hasBEXTR(Subtarget, NVT))
+    return SDValue();
+
+  // Must have a shift right.
+  if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA)
+    return SDValue();
+
+  // Shift can't have additional users.
+  if (!N0->hasOneUse())
+    return SDValue();
+
+  // Shift amount and RHS of and must be constant.
+  ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+  if (!MaskCst || !ShiftCst)
+    return SDValue();
+
+  // And RHS must be a mask.
+  uint64_t Mask = MaskCst->getZExtValue();
+  if (!isMask_64(Mask))
+    return SDValue();
+
+  uint64_t Shift = ShiftCst->getZExtValue();
+  uint64_t MaskSize = countPopulation(Mask);
+
+  // Don't interfere with something that can be handled by extracting AH.
+  // TODO: If we are able to fold a load, BEXTR might still be better than AH.
+  if (Shift == 8 && MaskSize == 8)
+    return SDValue();
+
+  // Make sure we are only using bits that were in the original value, not
+  // shifted in.
+  if (Shift + MaskSize > NVT.getSizeInBits())
+    return SDValue();
+
+  // Create a BEXTR node.
+  SDValue C = DAG.getConstant(Shift | (MaskSize << 8), dl, NVT);
+  SDValue New = DAG.getNode(X86ISD::BEXTR, dl, NVT, N0->getOperand(0), C);
+  return New;
+}
+
 // Look for (and (ctpop X), 1) which is the IR form of __builtin_parity.
 // Turn it into series of XORs and a setnp.
 static SDValue combineParity(SDNode *N, SelectionDAG &DAG,
@@ -35379,6 +35442,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
+  if (SDValue R = combineAndIntoBEXTR(N, DAG, Subtarget))
+    return R;
+
   if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
     return R;
 
diff --git a/llvm/test/CodeGen/X86/tbm_patterns.ll b/llvm/test/CodeGen/X86/tbm_patterns.ll
index 6865cc5a0ef..2b335ea4268 100644
--- a/llvm/test/CodeGen/X86/tbm_patterns.ll
+++ b/llvm/test/CodeGen/X86/tbm_patterns.ll
@@ -53,8 +53,7 @@ define i32 @test_x86_tbm_bextri_u32_z2(i32 %a, i32 %b, i32 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl %esi, %eax
-; CHECK-NEXT:    shrl $4, %edi
-; CHECK-NEXT:    testl $4095, %edi # imm = 0xFFF
+; CHECK-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
 ; CHECK-NEXT:    cmovnel %edx, %eax
 ; CHECK-NEXT:    retq
   %t0 = lshr i32 %a, 4
@@ -114,8 +113,7 @@ define i64 @test_x86_tbm_bextri_u64_z2(i64 %a, i64 %b, i64 %c) nounwind {
 ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    shrl $4, %edi
-; CHECK-NEXT:    testl $4095, %edi # imm = 0xFFF
+; CHECK-NEXT:    bextrl $3076, %edi, %ecx # imm = 0xC04
 ; CHECK-NEXT:    cmovneq %rdx, %rax
 ; CHECK-NEXT:    retq
   %t0 = lshr i64 %a, 4