AMDGPU: Split 64-bit and of constant up

This breaks the tests that were meant for testing 64-bit inline immediates, so move those to shl where they won't be broken up. This should be repeated for the other related bit ops. llvm-svn: 258095
author: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-01-18 22:01:13 +0000
committer: Matt Arsenault <Matthew.Arsenault@amd.com> 2016-01-18 22:01:13 +0000
commit: 6e3a45193aa927de0d4a677594f74c31cfc26727 (patch)
tree: 9aa2723f6e9c9845519753e867047853014630b6 /llvm/lib/Target
parent: 77d86d1c08132e299f785515cb18a0e8cd724c9d (diff)
download: bcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.tar.gz
bcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.zip
3 files changed, 70 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 69f4c7cf359..477851d4d3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -28,7 +28,7 @@
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/IR/DataLayout.h"
-
+#include "SIInstrInfo.h"
 using namespace llvm;
 
 static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
@@ -376,6 +376,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
   setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
   setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
 
+  setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::MUL);
@@ -1177,6 +1178,21 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
   return SDValue();
 }
 
+std::pair<SDValue, SDValue>
+AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
+  SDLoc SL(Op);
+
+  SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
+
+  const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+  const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
+
+  return std::make_pair(Lo, Hi);
+}
+
 SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
                                                   SelectionDAG &DAG) const {
   LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -2539,6 +2555,43 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
                       SN->getBasePtr(), SN->getMemOperand());
 }
 
+// TODO: Should repeat for other bit ops.
+SDValue AMDGPUTargetLowering::performAndCombine(SDNode *N,
+                                                DAGCombinerInfo &DCI) const {
+  if (N->getValueType(0) != MVT::i64)
+    return SDValue();
+
+  // Break up 64-bit and of a constant into two 32-bit ands. This will typically
+  // happen anyway for a VALU 64-bit and. This exposes other 32-bit integer
+  // combine opportunities since most 64-bit operations are decomposed this way.
+  // TODO: We won't want this for SALU especially if it is an inline immediate.
+  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!RHS)
+    return SDValue();
+
+  uint64_t Val = RHS->getZExtValue();
+  if (Lo_32(Val) != 0 && Hi_32(Val) != 0 && !RHS->hasOneUse()) {
+    // If either half of the constant is 0, this is really a 32-bit and, so
+    // split it. If we can re-use the full materialized constant, keep it.
+    return SDValue();
+  }
+
+  SDLoc SL(N);
+  SelectionDAG &DAG = DCI.DAG;
+
+  SDValue Lo, Hi;
+  std::tie(Lo, Hi) = split64BitValue(N->getOperand(0), DAG);
+
+  SDValue LoRHS = DAG.getConstant(Lo_32(Val), SL, MVT::i32);
+  SDValue HiRHS = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+
+  SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, LoRHS);
+  SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, HiRHS);
+
+  SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, LoAnd, HiAnd);
+  return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
+}
+
 SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   if (N->getValueType(0) != MVT::i64)
@@ -2751,6 +2804,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
 
     return performSrlCombine(N, DCI);
   }
+  case ISD::AND: {
+    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+      break;
+
+    return performAndCombine(N, DCI);
+  }
   case ISD::MUL:
     return performMulCombine(N, DCI);
   case AMDGPUISD::MUL_I24:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 920df8f5a6a..fa52c66f5a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -67,21 +67,27 @@ private:
 
   SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
 
+protected:
   SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS,
                              DAGCombinerInfo &DCI) const;
   SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
-protected:
   static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
   static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT);
 
   virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
                                      SelectionDAG &DAG) const;
 
+  /// Return 64-bit value Op as two 32-bit integers.
+  std::pair<SDValue, SDValue> split64BitValue(SDValue Op,
+                                              SelectionDAG &DAG) const;
+
   /// \brief Split a vector load into a scalar load of each component.
   SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 544867513d9..2c22cee4bf4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1915,6 +1915,9 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
   if (DCI.isBeforeLegalize())
     return SDValue();
 
+  if (SDValue Base = AMDGPUTargetLowering::performAndCombine(N, DCI))
+    return Base;
+
   SelectionDAG &DAG = DCI.DAG;
 
   // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
author	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-01-18 22:01:13 +0000
committer	Matt Arsenault <Matthew.Arsenault@amd.com>	2016-01-18 22:01:13 +0000
commit	6e3a45193aa927de0d4a677594f74c31cfc26727 (patch)
tree	9aa2723f6e9c9845519753e867047853014630b6 /llvm/lib/Target
parent	77d86d1c08132e299f785515cb18a0e8cd724c9d (diff)
download	bcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.tar.gz bcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.zip