[ARM] Better OR's for MVE compares

This adds a DeMorgan combine for OR's of compares to turn them into AND's, helping prevent them from going into and out of gpr registers. It also fills in the VCLE and VCLT nodes that MVE can select, allowing it to invert more compares. Differential Revision: https://reviews.llvm.org/D65059 llvm-svn: 366920
author: David Green <david.green@arm.com> 2019-07-24 16:42:09 +0000
committer: David Green <david.green@arm.com> 2019-07-24 16:42:09 +0000
commit: bab4d8ac5a613d53962d09f6652f7791494bffa0 (patch)
tree: 7e18cd9dc036a81ff07d7a05ff868009f9228aa5 /llvm/lib
parent: c5cc9efa075b6fcd8cfe16d59764dcbebc949b8c (diff)
download: bcm5719-llvm-bab4d8ac5a613d53962d09f6652f7791494bffa0.tar.gz
bcm5719-llvm-bab4d8ac5a613d53962d09f6652f7791494bffa0.zip
4 files changed, 73 insertions, 8 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d68021aeff6..276791165f5 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1521,10 +1521,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VCNEZ:         return "ARMISD::VCNEZ";
   case ARMISD::VCGE:          return "ARMISD::VCGE";
   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
+  case ARMISD::VCLE:          return "ARMISD::VCLE";
   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
   case ARMISD::VCGT:          return "ARMISD::VCGT";
   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
+  case ARMISD::VCLT:          return "ARMISD::VCLT";
   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
   case ARMISD::VTST:          return "ARMISD::VTST";
@@ -11820,6 +11822,57 @@ static SDValue PerformORCombineToBFI(SDNode *N,
   return SDValue();
 }
 
+static SDValue PerformORCombine_i1(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const ARMSubtarget *Subtarget) {
+  // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
+  // together with predicates
+  struct Codes {
+    unsigned Opcode;
+    unsigned Opposite;
+  } InvertCodes[] = {
+      {ARMISD::VCEQ, ARMISD::VCNE},
+      {ARMISD::VCEQZ, ARMISD::VCNEZ},
+      {ARMISD::VCGE, ARMISD::VCLT},
+      {ARMISD::VCGEZ, ARMISD::VCLTZ},
+      {ARMISD::VCGT, ARMISD::VCLE},
+      {ARMISD::VCGTZ, ARMISD::VCLEZ},
+  };
+
+  EVT VT = N->getValueType(0);
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  unsigned Opposite0 = 0;
+  unsigned Opposite1 = 0;
+  for (auto Code : InvertCodes) {
+    if (N0->getOpcode() == Code.Opcode)
+      Opposite0 = Code.Opposite;
+    if (N0->getOpcode() == Code.Opposite)
+      Opposite0 = Code.Opcode;
+    if (N1->getOpcode() == Code.Opcode)
+      Opposite1 = Code.Opposite;
+    if (N1->getOpcode() == Code.Opposite)
+      Opposite1 = Code.Opcode;
+  }
+
+  if (!Opposite0 || !Opposite1)
+    return SDValue();
+
+  SmallVector<SDValue, 4> Ops0;
+  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i)
+    Ops0.push_back(N0->getOperand(i));
+  SmallVector<SDValue, 4> Ops1;
+  for (unsigned i = 0, e = N1->getNumOperands(); i != e; ++i)
+    Ops1.push_back(N1->getOperand(i));
+
+  SDValue NewN0 = DCI.DAG.getNode(Opposite0, SDLoc(N0), VT, Ops0);
+  SDValue NewN1 = DCI.DAG.getNode(Opposite1, SDLoc(N1), VT, Ops1);
+  SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1);
+  return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And,
+                         DCI.DAG.getAllOnesConstant(SDLoc(N), VT));
+}
+
 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
 static SDValue PerformORCombine(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
@@ -11904,6 +11957,10 @@ static SDValue PerformORCombine(SDNode *N,
     }
   }
 
+  if (Subtarget->hasMVEIntegerOps() &&
+      (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1))
+    return PerformORCombine_i1(N, DCI, Subtarget);
+
   // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
   // reasonable.
   if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 0015f414e30..715a5b40f21 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -137,10 +137,12 @@ class VectorType;
       VCNEZ,        // Vector compare not equal to zero (MVE)
       VCGE,         // Vector compare greater than or equal.
       VCGEZ,        // Vector compare greater than or equal to zero.
+      VCLE,         // Vector compare less than or equal.
       VCLEZ,        // Vector compare less than or equal to zero.
       VCGEU,        // Vector compare unsigned greater than or equal.
       VCGT,         // Vector compare greater than.
       VCGTZ,        // Vector compare greater than zero.
+      VCLT,         // Vector compare less than.
       VCLTZ,        // Vector compare less than zero.
       VCGTU,        // Vector compare unsigned greater than.
       VTST,         // Vector test bits.
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 8290a858f96..c6d9a4673a4 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -274,10 +274,12 @@ def ARMvcne      : SDNode<"ARMISD::VCNE", SDTARMVCMP>;
 def ARMvcnez     : SDNode<"ARMISD::VCNEZ", SDTARMVCMPZ>;
 def ARMvcge      : SDNode<"ARMISD::VCGE", SDTARMVCMP>;
 def ARMvcgez     : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>;
+def ARMvcle      : SDNode<"ARMISD::VCLE", SDTARMVCMP>;
 def ARMvclez     : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>;
 def ARMvcgeu     : SDNode<"ARMISD::VCGEU", SDTARMVCMP>;
 def ARMvcgt      : SDNode<"ARMISD::VCGT", SDTARMVCMP>;
 def ARMvcgtz     : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>;
+def ARMvclt      : SDNode<"ARMISD::VCLT", SDTARMVCMP>;
 def ARMvcltz     : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>;
 def ARMvcgtu     : SDNode<"ARMISD::VCGTU", SDTARMVCMP>;
 
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index b134768c9c5..a0cc4916bda 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3041,15 +3041,17 @@ multiclass unpred_vcmpf_r<SDPatternOperator opnode, int fc> {
 let Predicates = [HasMVEInt] in {
   defm MVE_VCEQZ  : unpred_vcmp_z<ARMvceqz, "i", 0>;
   defm MVE_VCNEZ  : unpred_vcmp_z<ARMvcnez, "i", 1>;
-  defm MVE_VCLEZ  : unpred_vcmp_z<ARMvclez, "s", 13>;
-  defm MVE_VCGTZ  : unpred_vcmp_z<ARMvcgtz, "s", 12>;
-  defm MVE_VCLTZ  : unpred_vcmp_z<ARMvcltz, "s", 11>;
   defm MVE_VCGEZ  : unpred_vcmp_z<ARMvcgez, "s", 10>;
+  defm MVE_VCLTZ  : unpred_vcmp_z<ARMvcltz, "s", 11>;
+  defm MVE_VCGTZ  : unpred_vcmp_z<ARMvcgtz, "s", 12>;
+  defm MVE_VCLEZ  : unpred_vcmp_z<ARMvclez, "s", 13>;
 
   defm MVE_VCEQ   : unpred_vcmp_r<ARMvceq, "i", 0>;
   defm MVE_VCNE   : unpred_vcmp_r<ARMvcne, "i", 1>;
-  defm MVE_VCGT   : unpred_vcmp_r<ARMvcgt, "s", 12>;
   defm MVE_VCGE   : unpred_vcmp_r<ARMvcge, "s", 10>;
+  defm MVE_VCLT   : unpred_vcmp_r<ARMvclt, "s", 11>;
+  defm MVE_VCGT   : unpred_vcmp_r<ARMvcgt, "s", 12>;
+  defm MVE_VCLE   : unpred_vcmp_r<ARMvcle, "s", 13>;
   defm MVE_VCGTU  : unpred_vcmp_r<ARMvcgtu, "u", 8>;
   defm MVE_VCGEU  : unpred_vcmp_r<ARMvcgeu, "u", 2>;
 }
@@ -3057,13 +3059,15 @@ let Predicates = [HasMVEInt] in {
 let Predicates = [HasMVEFloat] in {
   defm MVE_VFCEQZ  : unpred_vcmpf_z<ARMvceqz, 0>;
   defm MVE_VFCNEZ  : unpred_vcmpf_z<ARMvcnez, 1>;
-  defm MVE_VFCLEZ  : unpred_vcmpf_z<ARMvclez, 13>;
-  defm MVE_VFCGTZ  : unpred_vcmpf_z<ARMvcgtz, 12>;
-  defm MVE_VFCLTZ  : unpred_vcmpf_z<ARMvcltz, 11>;
   defm MVE_VFCGEZ  : unpred_vcmpf_z<ARMvcgez, 10>;
+  defm MVE_VFCLTZ  : unpred_vcmpf_z<ARMvcltz, 11>;
+  defm MVE_VFCGTZ  : unpred_vcmpf_z<ARMvcgtz, 12>;
+  defm MVE_VFCLEZ  : unpred_vcmpf_z<ARMvclez, 13>;
 
-  defm MVE_VFCGT   : unpred_vcmpf_r<ARMvcgt, 12>;
   defm MVE_VFCGE   : unpred_vcmpf_r<ARMvcge, 10>;
+  defm MVE_VFCLT   : unpred_vcmpf_r<ARMvclt, 11>;
+  defm MVE_VFCGT   : unpred_vcmpf_r<ARMvcgt, 12>;
+  defm MVE_VFCLE   : unpred_vcmpf_r<ARMvcle, 13>;
   defm MVE_VFCEQ   : unpred_vcmpf_r<ARMvceq, 0>;
   defm MVE_VFCNE   : unpred_vcmpf_r<ARMvcne, 1>;
 }
author	David Green <david.green@arm.com>	2019-07-24 16:42:09 +0000
committer	David Green <david.green@arm.com>	2019-07-24 16:42:09 +0000
commit	bab4d8ac5a613d53962d09f6652f7791494bffa0 (patch)
tree	7e18cd9dc036a81ff07d7a05ff868009f9228aa5 /llvm/lib
parent	c5cc9efa075b6fcd8cfe16d59764dcbebc949b8c (diff)
download	bcm5719-llvm-bab4d8ac5a613d53962d09f6652f7791494bffa0.tar.gz bcm5719-llvm-bab4d8ac5a613d53962d09f6652f7791494bffa0.zip