summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp37
1 files changed, 21 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4519b987b55..cc326cdce88 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1068,15 +1068,12 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDValue SITargetLowering::lowerKernargMemParameter(
SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
- uint64_t Offset, bool Signed,
+ uint64_t Offset, unsigned Align, bool Signed,
const ISD::InputArg *Arg) const {
- const DataLayout &DL = DAG.getDataLayout();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- unsigned Align = DL.getABITypeAlignment(Ty);
-
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
MachineMemOperand::MODereferenceable |
@@ -1663,7 +1660,15 @@ SDValue SITargetLowering::LowerFormalArguments(
SmallVector<SDValue, 16> Chains;
- for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ // FIXME: This is the minimum kernel argument alignment. We should improve
+ // this to the maximum alignment of the arguments.
+ //
+ // FIXME: Alignment of explicit arguments totally broken with non-0 explicit
+ // kern arg offset.
+ const unsigned KernelArgBaseAlign = 16;
+ const unsigned ExplicitOffset = Subtarget->getExplicitKernelArgOffset(Fn);
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
if (Skipped[i]) {
InVals.push_back(DAG.getUNDEF(Arg.VT));
@@ -1677,14 +1682,14 @@ SDValue SITargetLowering::LowerFormalArguments(
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
- const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(Fn) +
- VA.getLocMemOffset();
+ const uint64_t Offset = ExplicitOffset + VA.getLocMemOffset();
Info->setABIArgOffset(Offset + MemVT.getStoreSize());
+ unsigned Align = MinAlign(KernelArgBaseAlign, Offset);
// The first 36 bytes of the input buffer contains information about
- // thread group and global sizes.
+ // thread group and global sizes for clover.
SDValue Arg = lowerKernargMemParameter(
- DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt(), &Ins[i]);
+ DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]);
Chains.push_back(Arg.getValue(1));
auto *ParamTy =
@@ -4303,7 +4308,7 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
unsigned Offset) const {
SDLoc SL(Op);
SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL,
- DAG.getEntryNode(), Offset, false);
+ DAG.getEntryNode(), Offset, 4, false);
// The local size values will have the hi 16-bits as zero.
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
DAG.getValueType(VT));
@@ -4404,37 +4409,37 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_X, false);
+ SI::KernelInputOffsets::NGROUPS_X, 4, false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Y, false);
+ SI::KernelInputOffsets::NGROUPS_Y, 4, false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Z, false);
+ SI::KernelInputOffsets::NGROUPS_Z, 4, false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_X, 4, false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y, 4, false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z, 4, false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
OpenPOWER on IntegriCloud