summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-01-28 20:14:49 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-01-28 20:14:49 +0000
commitcdd191d9db6a17b75b5f6d6f3d1d2691ac198153 (patch)
tree3ade69542218a8b35a4658c843389b8a1fd01274 /llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
parente4e9ba2bea60c73a19d8a673e947f0d9b258a777 (diff)
downloadbcm5719-llvm-cdd191d9db6a17b75b5f6d6f3d1d2691ac198153.tar.gz
bcm5719-llvm-cdd191d9db6a17b75b5f6d6f3d1d2691ac198153.zip
AMDGPU: Add DS append/consume intrinsics
Since these pass the pointer in m0 unlike other DS instructions, these need to worry about whether the address is uniform or not. This assumes the address is dynamically uniform, and just uses readfirstlane to get a copy into an SGPR. I don't know if these have the same 16-bit add for the addressing mode offset problem on SI or not, but I've just assumed they do. Also includes some misc. changes to avoid test differences between the LDS and GDS versions. llvm-svn: 352422
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp87
1 files changed, 72 insertions, 15 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 1bc6be45056..2cdd691fc10 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -106,12 +106,13 @@ private:
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
- SDNode *glueCopyToM0(SDNode *N) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+ SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset,
unsigned OffsetBits) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
@@ -209,6 +210,7 @@ private:
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
+ void SelectINTRINSIC_W_CHAIN(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
@@ -339,29 +341,32 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
}
-SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
- if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
- !Subtarget->ldsRequiresM0Init())
- return N;
-
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
+ *static_cast<const SITargetLowering*>(getTargetLowering());
// Write max value to m0 before each load operation
SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
- CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+ Val);
SDValue Glue = M0.getValue(1);
SmallVector <SDValue, 8> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
Ops.push_back(Glue);
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
+ if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
+ !Subtarget->ldsRequiresM0Init())
+ return N;
+ return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+}
+
MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
EVT VT) const {
SDNode *Lo = CurDAG->getMachineNode(
@@ -472,7 +477,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
- N = glueCopyToM0(N);
+ N = glueCopyToM0LDSInit(N);
switch (Opc) {
default:
@@ -570,7 +575,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: {
- N = glueCopyToM0(N);
+ N = glueCopyToM0LDSInit(N);
break;
}
@@ -648,6 +653,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectCode(N);
return;
}
+
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ SelectINTRINSIC_W_CHAIN(N);
+ return;
}
}
@@ -828,7 +839,7 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
-bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
(OffsetBits == 8 && !isUInt<8>(Offset)))
@@ -1760,6 +1771,52 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if ((IntrID != Intrinsic::amdgcn_ds_append &&
+ IntrID != Intrinsic::amdgcn_ds_consume) ||
+ N->getValueType(0) != MVT::i32) {
+ SelectCode(N);
+ return;
+ }
+
+ // The address is assumed to be uniform, so if it ends up in a VGPR, it will
+ // be copied to an SGPR with readfirstlane.
+ unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
+ AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(2);
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+
+ SDValue Offset;
+ if (CurDAG->isBaseWithConstantOffset(Ptr)) {
+ SDValue PtrBase = Ptr.getOperand(0);
+ SDValue PtrOffset = Ptr.getOperand(1);
+
+ const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
+ if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
+ N = glueCopyToM0(N, PtrBase);
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
+ }
+ }
+
+ if (!Offset) {
+ N = glueCopyToM0(N, Ptr);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
+ }
+
+ SDValue Ops[] = {
+ Offset,
+ CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
+ Chain,
+ N->getOperand(N->getNumOperands() - 1) // New glue
+ };
+
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
unsigned &Mods) const {
Mods = 0;
OpenPOWER on IntegriCloud