[ARM] Adjust AND immediates to make them cheaper to select.

LLVM normally prefers to minimize the number of bits set in an AND immediate, but that doesn't always match the available ARM instructions. In Thumb1 mode, prefer uxtb or uxth where possible; otherwise, prefer a two-instruction sequence movs+ands or movs+bics. Some potential improvements outlined in ARMTargetLowering::targetShrinkDemandedConstant, but seems to work pretty well already. The ARMISelDAGToDAG fix ensures we don't generate an invalid UBFX instruction due to a larger-than-expected mask. (It's orthogonal, in some sense, but as far as I can tell it's either impossible or nearly impossible to reproduce the bug without this change.) According to my testing, this seems to consistently improve codesize by a small amount by forming bic more often for ISD::AND with an immediate. Differential Revision: https://reviews.llvm.org/D50030 llvm-svn: 339472
author: Eli Friedman <efriedma@codeaurora.org> 2018-08-10 21:21:53 +0000
committer: Eli Friedman <efriedma@codeaurora.org> 2018-08-10 21:21:53 +0000
commit: e1687a89e885c89b3a1f75c9ab35d5bb74a62869 (patch)
tree: a91891b66373080d51e1374a1cdb031721482f44 /llvm/lib/Target
parent: 29ec67b62f23e9dc357439809a01d6156e769eda (diff)
download: bcm5719-llvm-e1687a89e885c89b3a1f75c9ab35d5bb74a62869.tar.gz
bcm5719-llvm-e1687a89e885c89b3a1f75c9ab35d5bb74a62869.zip
3 files changed, 85 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 81196555535..665e67beddb 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -2309,6 +2309,11 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
                                 Srl_imm)) {
         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
 
+        // Mask off the unnecessary bits of the AND immediate; normally
+        // DAGCombine will do this, but that might not happen if
+        // targetShrinkDemandedConstant chooses a different immediate.
+        And_imm &= -1U >> Srl_imm;
+
         // Note: The width operand is encoded as width-1.
         unsigned Width = countTrailingOnes(And_imm) - 1;
         unsigned LSB = Srl_imm;
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index da99058b91a..f1801c07a23 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13602,6 +13602,83 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
   }
 }
 
+bool
+ARMTargetLowering::targetShrinkDemandedConstant(SDValue Op,
+                                                const APInt &DemandedAPInt,
+                                                TargetLoweringOpt &TLO) const {
+  // Delay optimization, so we don't have to deal with illegal types, or block
+  // optimizations.
+  if (!TLO.LegalOps)
+    return false;
+
+  // Only optimize AND for now.
+  if (Op.getOpcode() != ISD::AND)
+    return false;
+
+  EVT VT = Op.getValueType();
+
+  // Ignore vectors.
+  if (VT.isVector())
+    return false;
+
+  assert(VT == MVT::i32 && "Unexpected integer type");
+
+  // Make sure the RHS really is a constant.
+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (!C)
+    return false;
+
+  unsigned Mask = C->getZExtValue();
+
+  // If mask is zero, nothing to do.
+  if (!Mask)
+    return false;
+
+  unsigned Demanded = DemandedAPInt.getZExtValue();
+  unsigned ShrunkMask = Mask & Demanded;
+  unsigned ExpandedMask = Mask | ~Demanded;
+
+  auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
+    return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
+  };
+  auto UseMask = [this, Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
+    if (NewMask == Mask)
+      return true;
+    SDLoc DL(Op);
+    SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
+    SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
+    return TLO.CombineTo(Op, NewOp);
+  };
+
+  // Prefer uxtb mask.
+  if (IsLegalMask(0xFF))
+    return UseMask(0xFF);
+
+  // Prefer uxth mask.
+  if (IsLegalMask(0xFFFF))
+    return UseMask(0xFFFF);
+
+  // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
+  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+  if (ShrunkMask < 256)
+    return UseMask(ShrunkMask);
+
+  // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
+  // FIXME: Prefer a contiguous sequence of bits for other optimizations.
+  if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
+    return UseMask(ExpandedMask);
+
+  // Potential improvements:
+  //
+  // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
+  // We could try to prefer Thumb1 immediates which can be lowered to a
+  // two-instruction sequence.
+  // We could try to recognize more legal ARM/Thumb2 immediates here.
+
+  return false;
+}
+
+
 //===----------------------------------------------------------------------===//
 //                           ARM Inline Assembly Support
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 50b4c2977fb..47b20aa4a6a 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -389,6 +389,9 @@ class VectorType;
                                        const SelectionDAG &DAG,
                                        unsigned Depth) const override;
 
+    bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
+                                      TargetLoweringOpt &TLO) const override;
+
 
     bool ExpandInlineAsm(CallInst *CI) const override;
author	Eli Friedman <efriedma@codeaurora.org>	2018-08-10 21:21:53 +0000
committer	Eli Friedman <efriedma@codeaurora.org>	2018-08-10 21:21:53 +0000
commit	e1687a89e885c89b3a1f75c9ab35d5bb74a62869 (patch)
tree	a91891b66373080d51e1374a1cdb031721482f44 /llvm/lib/Target
parent	29ec67b62f23e9dc357439809a01d6156e769eda (diff)
download	bcm5719-llvm-e1687a89e885c89b3a1f75c9ab35d5bb74a62869.tar.gz bcm5719-llvm-e1687a89e885c89b3a1f75c9ab35d5bb74a62869.zip