summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-18 22:01:13 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-01-18 22:01:13 +0000
commit6e3a45193aa927de0d4a677594f74c31cfc26727 (patch)
tree9aa2723f6e9c9845519753e867047853014630b6 /llvm/lib/Target
parent77d86d1c08132e299f785515cb18a0e8cd724c9d (diff)
downloadbcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.tar.gz
bcm5719-llvm-6e3a45193aa927de0d4a677594f74c31cfc26727.zip
AMDGPU: Split 64-bit and of constant up
This breaks the tests that were meant for testing 64-bit inline immediates, so move those to shl where they won't be broken up. This should be repeated for the other related bit ops. llvm-svn: 258095
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp61
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h8
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp3
3 files changed, 70 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 69f4c7cf359..477851d4d3a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -28,7 +28,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DataLayout.h"
-
+#include "SIInstrInfo.h"
using namespace llvm;
static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
@@ -376,6 +376,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
+ setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::MUL);
@@ -1177,6 +1178,21 @@ SDValue AMDGPUTargetLowering::CombineFMinMaxLegacy(SDLoc DL,
return SDValue();
}
+std::pair<SDValue, SDValue>
+AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+
+ SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op);
+
+ const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
+
+ return std::make_pair(Lo, Hi);
+}
+
SDValue AMDGPUTargetLowering::ScalarizeVectorLoad(const SDValue Op,
SelectionDAG &DAG) const {
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -2539,6 +2555,43 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
SN->getBasePtr(), SN->getMemOperand());
}
+// TODO: Should repeat for other bit ops.
+SDValue AMDGPUTargetLowering::performAndCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (N->getValueType(0) != MVT::i64)
+ return SDValue();
+
+ // Break up 64-bit and of a constant into two 32-bit ands. This will typically
+ // happen anyway for a VALU 64-bit and. This exposes other 32-bit integer
+ // combine opportunities since most 64-bit operations are decomposed this way.
+ // TODO: We won't want this for SALU especially if it is an inline immediate.
+ const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!RHS)
+ return SDValue();
+
+ uint64_t Val = RHS->getZExtValue();
+ if (Lo_32(Val) != 0 && Hi_32(Val) != 0 && !RHS->hasOneUse()) {
+ // If either half of the constant is 0, this is really a 32-bit and, so
+ // split it. If we can re-use the full materialized constant, keep it.
+ return SDValue();
+ }
+
+ SDLoc SL(N);
+ SelectionDAG &DAG = DCI.DAG;
+
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = split64BitValue(N->getOperand(0), DAG);
+
+ SDValue LoRHS = DAG.getConstant(Lo_32(Val), SL, MVT::i32);
+ SDValue HiRHS = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+
+ SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, LoRHS);
+ SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, HiRHS);
+
+ SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, LoAnd, HiAnd);
+ return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
+}
+
SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (N->getValueType(0) != MVT::i64)
@@ -2751,6 +2804,12 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performSrlCombine(N, DCI);
}
+ case ISD::AND: {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ break;
+
+ return performAndCombine(N, DCI);
+ }
case ISD::MUL:
return performMulCombine(N, DCI);
case AMDGPUISD::MUL_I24:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 920df8f5a6a..fa52c66f5a7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -67,21 +67,27 @@ private:
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+protected:
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS,
DAGCombinerInfo &DCI) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-protected:
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT);
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
+ /// Return 64-bit value Op as two 32-bit integers.
+ std::pair<SDValue, SDValue> split64BitValue(SDValue Op,
+ SelectionDAG &DAG) const;
+
/// \brief Split a vector load into a scalar load of each component.
SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 544867513d9..2c22cee4bf4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1915,6 +1915,9 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
if (DCI.isBeforeLegalize())
return SDValue();
+ if (SDValue Base = AMDGPUTargetLowering::performAndCombine(N, DCI))
+ return Base;
+
SelectionDAG &DAG = DCI.DAG;
// (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
OpenPOWER on IntegriCloud