summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp87
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp5
4 files changed, 94 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 1bc6be45056..2cdd691fc10 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -106,12 +106,13 @@ private:
MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const;
- SDNode *glueCopyToM0(SDNode *N) const;
+ SDNode *glueCopyToM0LDSInit(SDNode *N) const;
+ SDNode *glueCopyToM0(SDNode *N, SDValue Val) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+ bool isDSOffsetLegal(SDValue Base, unsigned Offset,
unsigned OffsetBits) const;
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
@@ -209,6 +210,7 @@ private:
void SelectBRCOND(SDNode *N);
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
+ void SelectINTRINSIC_W_CHAIN(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
@@ -339,29 +341,32 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
}
}
-SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
- if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
- !Subtarget->ldsRequiresM0Init())
- return N;
-
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const {
const SITargetLowering& Lowering =
- *static_cast<const SITargetLowering*>(getTargetLowering());
+ *static_cast<const SITargetLowering*>(getTargetLowering());
// Write max value to m0 before each load operation
SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
- CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+ Val);
SDValue Glue = M0.getValue(1);
SmallVector <SDValue, 8> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
- Ops.push_back(N->getOperand(i));
- }
+ for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ Ops.push_back(N->getOperand(i));
+
Ops.push_back(Glue);
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
}
+SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const {
+ if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS ||
+ !Subtarget->ldsRequiresM0Init())
+ return N;
+ return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
+}
+
MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm,
EVT VT) const {
SDNode *Lo = CurDAG->getMachineNode(
@@ -472,7 +477,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
Opc == ISD::ATOMIC_LOAD_FADD ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
- N = glueCopyToM0(N);
+ N = glueCopyToM0LDSInit(N);
switch (Opc) {
default:
@@ -570,7 +575,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::STORE:
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: {
- N = glueCopyToM0(N);
+ N = glueCopyToM0LDSInit(N);
break;
}
@@ -648,6 +653,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectCode(N);
return;
}
+
+ break;
+ }
+ case ISD::INTRINSIC_W_CHAIN: {
+ SelectINTRINSIC_W_CHAIN(N);
+ return;
}
}
@@ -828,7 +839,7 @@ void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
}
-bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
+bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset,
unsigned OffsetBits) const {
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
(OffsetBits == 8 && !isUInt<8>(Offset)))
@@ -1760,6 +1771,52 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
CurDAG->RemoveDeadNode(N);
}
+void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
+ unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+ if ((IntrID != Intrinsic::amdgcn_ds_append &&
+ IntrID != Intrinsic::amdgcn_ds_consume) ||
+ N->getValueType(0) != MVT::i32) {
+ SelectCode(N);
+ return;
+ }
+
+ // The address is assumed to be uniform, so if it ends up in a VGPR, it will
+ // be copied to an SGPR with readfirstlane.
+ unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ?
+ AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME;
+
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(2);
+ MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
+ bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS;
+
+ SDValue Offset;
+ if (CurDAG->isBaseWithConstantOffset(Ptr)) {
+ SDValue PtrBase = Ptr.getOperand(0);
+ SDValue PtrOffset = Ptr.getOperand(1);
+
+ const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
+ if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue(), 16)) {
+ N = glueCopyToM0(N, PtrBase);
+ Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
+ }
+ }
+
+ if (!Offset) {
+ N = glueCopyToM0(N, Ptr);
+ Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
+ }
+
+ SDValue Ops[] = {
+ Offset,
+ CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32),
+ Chain,
+ N->getOperand(N->getNumOperands() - 1) // New glue
+ };
+
+ CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
unsigned &Mods) const {
Mods = 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index e973bb9b0a2..aaad8b6e48a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -109,7 +109,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
// modes on SI to know the high bits are 0 so pointer adds don't wrap. We
// can't represent this with range metadata because it's only allowed for
// integer types.
- if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ if ((PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
ST.getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS)
continue;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 0c1fca88235..b57b2d2fd20 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -926,7 +926,20 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
+ case Intrinsic::amdgcn_ds_append:
+ case Intrinsic::amdgcn_ds_consume: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getType());
+ Info.ptrVal = CI.getOperand(0);
+ Info.align = 0;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ const ConstantInt *Vol = dyn_cast<ConstantInt>(CI.getOperand(1));
+ if (!Vol || !Vol->isZero())
+ Info.flags |= MachineMemOperand::MOVolatile;
+
+ return true;
+ }
default:
return false;
}
@@ -1978,7 +1991,8 @@ SDValue SITargetLowering::LowerFormalArguments(
auto *ParamTy =
dyn_cast<PointerType>(FType->getParamType(Ins[i].getOrigArgIndex()));
if (Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS &&
- ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+ ParamTy && (ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
+ ParamTy->getAddressSpace() == AMDGPUAS::REGION_ADDRESS)) {
// On SI local pointers are just offsets into LDS, so they are always
// less than 16-bits. On CI and newer they could potentially be
// real pointers, so we can't guarantee their size.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 88bab949a81..45687d4486e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -275,6 +275,11 @@ bool SIInstrInfo::getMemOperandWithOffset(MachineInstr &LdSt,
if (OffsetImm) {
// Normal, single offset LDS instruction.
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
+ // TODO: ds_consume/ds_append use M0 for the base address. Is it safe to
+ // report that here?
+ if (!BaseOp)
+ return false;
+
Offset = OffsetImm->getImm();
assert(BaseOp->isReg() && "getMemOperandWithOffset only supports base "
"operands of type register.");
OpenPOWER on IntegriCloud