summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp209
1 files changed, 174 insertions, 35 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 2cdd691fc10..c62420ec032 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -51,6 +51,8 @@
#include <new>
#include <vector>
+#define DEBUG_TYPE "isel"
+
using namespace llvm;
namespace llvm {
@@ -88,7 +90,10 @@ public:
SelectionDAGISel::getAnalysisUsage(AU);
}
+ bool matchLoadD16FromBuildVector(SDNode *N) const;
+
bool runOnMachineFunction(MachineFunction &MF) override;
+ void PreprocessISelDAG() override;
void Select(SDNode *N) override;
StringRef getPassName() const override;
void PostprocessISelDAG() override;
@@ -193,6 +198,7 @@ private:
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ SDValue getHi16Elt(SDValue In) const;
bool SelectHi16Elt(SDValue In, SDValue &Src) const;
void SelectADD_SUB_I64(SDNode *N);
@@ -236,11 +242,49 @@ public:
SDValue &Offset) override;
bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void PreprocessISelDAG() override {}
+
protected:
// Include the pieces autogenerated from the target description.
#include "R600GenDAGISel.inc"
};
+static SDValue stripBitcast(SDValue Val) {
+ return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
+}
+
+// Figure out if this is really an extract of the high 16-bits of a dword.
+static bool isExtractHiElt(SDValue In, SDValue &Out) {
+ In = stripBitcast(In);
+ if (In.getOpcode() != ISD::TRUNCATE)
+ return false;
+
+ SDValue Srl = In.getOperand(0);
+ if (Srl.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+ if (ShiftAmt->getZExtValue() == 16) {
+ Out = stripBitcast(Srl.getOperand(0));
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+// Look through operations that obscure just looking at the low 16-bits of the
+// same register.
+static SDValue stripExtractLoElt(SDValue In) {
+ if (In.getOpcode() == ISD::TRUNCATE) {
+ SDValue Src = In.getOperand(0);
+ if (Src.getValueType().getSizeInBits() == 32)
+ return stripBitcast(Src);
+ }
+
+ return In;
+}
+
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
@@ -270,6 +314,114 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
return SelectionDAGISel::runOnMachineFunction(MF);
}
+bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const {
+ assert(Subtarget->d16PreservesUnusedBits());
+ MVT VT = N->getValueType(0).getSimpleVT();
+ if (VT != MVT::v2i16 && VT != MVT::v2f16)
+ return false;
+
+ SDValue Lo = N->getOperand(0);
+ SDValue Hi = N->getOperand(1);
+
+ LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi));
+
+ // build_vector lo, (load ptr) -> load_d16_hi ptr, lo
+ // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo
+ // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo
+
+ // Need to check for possible indirect dependencies on the other half of the
+ // vector to avoid introducing a cycle.
+ if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) {
+ SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
+
+ SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo);
+ SDValue Ops[] = {
+ LdHi->getChain(), LdHi->getBasePtr(), TiedIn
+ };
+
+ unsigned LoadOp = AMDGPUISD::LOAD_D16_HI;
+ if (LdHi->getMemoryVT() == MVT::i8) {
+ LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ?
+ AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8;
+ } else {
+ assert(LdHi->getMemoryVT() == MVT::i16);
+ }
+
+ SDValue NewLoadHi =
+ CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList,
+ Ops, LdHi->getMemoryVT(),
+ LdHi->getMemOperand());
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1));
+ return true;
+ }
+
+ // build_vector (load ptr), hi -> load_d16_lo ptr, hi
+ // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi
+ // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi
+ LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo));
+ if (LdLo && Lo.hasOneUse()) {
+ SDValue TiedIn = getHi16Elt(Hi);
+ if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode()))
+ return false;
+
+ SDVTList VTList = CurDAG->getVTList(VT, MVT::Other);
+ unsigned LoadOp = AMDGPUISD::LOAD_D16_LO;
+ if (LdLo->getMemoryVT() == MVT::i8) {
+ LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ?
+ AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8;
+ } else {
+ assert(LdLo->getMemoryVT() == MVT::i16);
+ }
+
+ TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn);
+
+ SDValue Ops[] = {
+ LdLo->getChain(), LdLo->getBasePtr(), TiedIn
+ };
+
+ SDValue NewLoadLo =
+ CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList,
+ Ops, LdLo->getMemoryVT(),
+ LdLo->getMemOperand());
+
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo);
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1));
+ return true;
+ }
+
+ return false;
+}
+
+void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
+ if (!Subtarget->d16PreservesUnusedBits())
+ return;
+
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
+
+ bool MadeChange = false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty())
+ continue;
+
+ switch (N->getOpcode()) {
+ case ISD::BUILD_VECTOR:
+ MadeChange |= matchLoadD16FromBuildVector(N);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (MadeChange) {
+ CurDAG->RemoveDeadNodes();
+ LLVM_DEBUG(dbgs() << "After PreProcess:\n";
+ CurDAG->dump(););
+ }
+}
+
bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
if (TM.Options.NoNaNsFPMath)
return true;
@@ -1889,41 +2041,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
return true;
}
-static SDValue stripBitcast(SDValue Val) {
- return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
-}
-
-// Figure out if this is really an extract of the high 16-bits of a dword.
-static bool isExtractHiElt(SDValue In, SDValue &Out) {
- In = stripBitcast(In);
- if (In.getOpcode() != ISD::TRUNCATE)
- return false;
-
- SDValue Srl = In.getOperand(0);
- if (Srl.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
- if (ShiftAmt->getZExtValue() == 16) {
- Out = stripBitcast(Srl.getOperand(0));
- return true;
- }
- }
- }
-
- return false;
-}
-
-// Look through operations that obscure just looking at the low 16-bits of the
-// same register.
-static SDValue stripExtractLoElt(SDValue In) {
- if (In.getOpcode() == ISD::TRUNCATE) {
- SDValue Src = In.getOperand(0);
- if (Src.getValueType().getSizeInBits() == 32)
- return stripBitcast(Src);
- }
-
- return In;
-}
-
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
@@ -2076,6 +2193,28 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
return true;
}
+SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
+ if (In.isUndef())
+ return CurDAG->getUNDEF(MVT::i32);
+
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
+ SDLoc SL(In);
+ return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32);
+ }
+
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
+ SDLoc SL(In);
+ return CurDAG->getConstant(
+ C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
+ }
+
+ SDValue Src;
+ if (isExtractHiElt(In, Src))
+ return Src;
+
+ return SDValue();
+}
+
// TODO: Can we identify things like v_mad_mixhi_f16?
bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
if (In.isUndef()) {
OpenPOWER on IntegriCloud