[X86] Disable shouldFoldConstantShiftPairToMask for scalar shifts on AMD targets (PR40758)

D61068 handled vector shifts, this patch does the same for scalars where there are similar number of pipes for shifts as bit ops - this is true almost entirely for AMD targets where the scalar ALUs are well balanced. This combine avoids AND immediate mask which usually means we reduce encoding size. Some tests show use of (slow, scaled) LEA instead of SHL in some cases, but thats due to particular shift immediates - shift+mask generate these just as easily. Differential Revision: https://reviews.llvm.org/D61830 llvm-svn: 360684
author: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-05-14 15:21:28 +0000
committer: Simon Pilgrim <llvm-dev@redking.me.uk> 2019-05-14 15:21:28 +0000
commit: c2d9cfd9250d93ec4e1785a8eabbf40eaa245b6a (patch)
tree: db93cdd04e60662d9bb4b0f212d8f55d3b6b2ffa /llvm/lib/Target
parent: 3b917019a5d6379eec026fb1fbabd4bd9e2bbead (diff)
download: bcm5719-llvm-c2d9cfd9250d93ec4e1785a8eabbf40eaa245b6a.tar.gz
bcm5719-llvm-c2d9cfd9250d93ec4e1785a8eabbf40eaa245b6a.zip
3 files changed, 22 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index a799c1fda49..8f6d201bbb4 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -427,6 +427,11 @@ def FeatureFastHorizontalOps
         "Prefer horizontal vector math instructions (haddp, phsub, etc.) over "
         "normal vector instructions with shuffles", [FeatureSSE3]>;
 
+def FeatureFastScalarShiftMasks
+    : SubtargetFeature<
+        "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true",
+        "Prefer a left/right scalar logical shift pair over a shift+and pair">;
+
 def FeatureFastVectorShiftMasks
     : SubtargetFeature<
         "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
@@ -784,6 +789,7 @@ def ProcessorFeatures {
                                                       FeatureSlowSHLD,
                                                       FeatureLAHFSAHF,
                                                       FeatureFast15ByteNOP,
+                                                      FeatureFastScalarShiftMasks,
                                                       FeatureFastVectorShiftMasks];
   list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures;
 
@@ -825,6 +831,7 @@ def ProcessorFeatures {
                                                       FeatureSlowSHLD,
                                                       FeatureLAHFSAHF,
                                                       FeatureFast11ByteNOP,
+                                                      FeatureFastScalarShiftMasks,
                                                       FeatureBranchFusion];
   list<SubtargetFeature> BdVer1Features = BdVer1InheritableFeatures;
 
@@ -876,6 +883,7 @@ def ProcessorFeatures {
                                        FeatureFastBEXTR,
                                        FeatureFast15ByteNOP,
                                        FeatureBranchFusion,
+                                       FeatureFastScalarShiftMasks,
                                        FeatureMMX,
                                        FeatureMOVBE,
                                        FeatureMWAITX,
@@ -1092,20 +1100,22 @@ foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
 foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
                  FeatureSSE2, Feature3DNowA, FeatureFXSR, FeatureNOPL,
-                 Feature64Bit, FeatureSlowSHLD, FeatureCMOV]>;
+                 Feature64Bit, FeatureSlowSHLD, FeatureCMOV,
+                 FeatureFastScalarShiftMasks]>;
 }
 
 foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
   def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B, FeatureSSE3,
                  Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B,
-                 FeatureSlowSHLD, FeatureCMOV, Feature64Bit]>;
+                 FeatureSlowSHLD, FeatureCMOV, Feature64Bit,
+                 FeatureFastScalarShiftMasks]>;
 }
 
 foreach P = ["amdfam10", "barcelona"] in {
   def : Proc<P, [FeatureX87, FeatureCMPXCHG8B, FeatureSSE4A, Feature3DNowA,
                  FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT,
                  FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV,
-                 Feature64Bit]>;
+                 Feature64Bit, FeatureFastScalarShiftMasks]>;
 }
 
 // Bobcat
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 28bd08f57c1..43911a1b016 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5021,11 +5021,12 @@ bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
           (N->getOpcode() == ISD::SRL &&
            N->getOperand(0).getOpcode() == ISD::SHL)) &&
          "Expected shift-shift mask");
-
-  if (Subtarget.hasFastVectorShiftMasks() && N->getValueType(0).isVector()) {
+  EVT VT = N->getValueType(0);
+  if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
+      (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
     // Only fold if the shift values are equal - so it folds to AND.
-    // TODO - we should fold if either is non-uniform but we don't do the
-    // fold for non-splats yet.
+    // TODO - we should fold if either is a non-uniform vector but we don't do
+    // the fold for non-splats yet.
     return N->getOperand(1) == N->getOperand(0).getOperand(1);
   }
   return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 3b11bb12f62..43d4ab71318 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -396,6 +396,9 @@ protected:
   /// Try harder to combine to horizontal vector ops if they are fast.
   bool HasFastHorizontalOps = false;
 
+  /// Prefer a left/right scalar logical shifts pair over a shift+and pair.
+  bool HasFastScalarShiftMasks = false;
+
   /// Prefer a left/right vector logical shifts pair over a shift+and pair.
   bool HasFastVectorShiftMasks = false;
 
@@ -650,6 +653,7 @@ public:
   bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
   bool hasFastBEXTR() const { return HasFastBEXTR; }
   bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
+  bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
   bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
   bool hasMacroFusion() const { return HasMacroFusion; }
   bool hasBranchFusion() const { return HasBranchFusion; }
author	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-05-14 15:21:28 +0000
committer	Simon Pilgrim <llvm-dev@redking.me.uk>	2019-05-14 15:21:28 +0000
commit	c2d9cfd9250d93ec4e1785a8eabbf40eaa245b6a (patch)
tree	db93cdd04e60662d9bb4b0f212d8f55d3b6b2ffa /llvm/lib/Target
parent	3b917019a5d6379eec026fb1fbabd4bd9e2bbead (diff)
download	bcm5719-llvm-c2d9cfd9250d93ec4e1785a8eabbf40eaa245b6a.tar.gz bcm5719-llvm-c2d9cfd9250d93ec4e1785a8eabbf40eaa245b6a.zip