summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-30 16:17:51 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-30 16:17:51 +0000
commit7b4826e6cec09e308dda43f6c4d1dc25d0905b7a (patch)
tree9067dff092c8b2892e529d71f88f2454d064f836 /llvm/lib/Target/AMDGPU
parentebaaa2ddae28edd5a29eeec3a7eb927da11f4fb6 (diff)
downloadbcm5719-llvm-7b4826e6cec09e308dda43f6c4d1dc25d0905b7a.tar.gz
bcm5719-llvm-7b4826e6cec09e308dda43f6c4d1dc25d0905b7a.zip
AMDGPU: Use better alignment for kernarg lowering
This was just emitting loads with the ABI alignment for the raw type. The true alignment is often better, especially when an illegal vector type was scalarized. The better alignment allows using a scalar load more often. llvm-svn: 333558
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp37
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h2
2 files changed, 22 insertions, 17 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4519b987b55..cc326cdce88 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1068,15 +1068,12 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT,
SDValue SITargetLowering::lowerKernargMemParameter(
SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
- uint64_t Offset, bool Signed,
+ uint64_t Offset, unsigned Align, bool Signed,
const ISD::InputArg *Arg) const {
- const DataLayout &DL = DAG.getDataLayout();
Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS);
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
- unsigned Align = DL.getABITypeAlignment(Ty);
-
SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset);
SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
MachineMemOperand::MODereferenceable |
@@ -1663,7 +1660,15 @@ SDValue SITargetLowering::LowerFormalArguments(
SmallVector<SDValue, 16> Chains;
- for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
+ // FIXME: This is the minimum kernel argument alignment. We should improve
+ // this to the maximum alignment of the arguments.
+ //
+ // FIXME: Alignment of explicit arguments totally broken with non-0 explicit
+ // kern arg offset.
+ const unsigned KernelArgBaseAlign = 16;
+ const unsigned ExplicitOffset = Subtarget->getExplicitKernelArgOffset(Fn);
+
+ for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
const ISD::InputArg &Arg = Ins[i];
if (Skipped[i]) {
InVals.push_back(DAG.getUNDEF(Arg.VT));
@@ -1677,14 +1682,14 @@ SDValue SITargetLowering::LowerFormalArguments(
VT = Ins[i].VT;
EVT MemVT = VA.getLocVT();
- const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(Fn) +
- VA.getLocMemOffset();
+ const uint64_t Offset = ExplicitOffset + VA.getLocMemOffset();
Info->setABIArgOffset(Offset + MemVT.getStoreSize());
+ unsigned Align = MinAlign(KernelArgBaseAlign, Offset);
// The first 36 bytes of the input buffer contains information about
- // thread group and global sizes.
+ // thread group and global sizes for clover.
SDValue Arg = lowerKernargMemParameter(
- DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt(), &Ins[i]);
+ DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]);
Chains.push_back(Arg.getValue(1));
auto *ParamTy =
@@ -4303,7 +4308,7 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG,
unsigned Offset) const {
SDLoc SL(Op);
SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL,
- DAG.getEntryNode(), Offset, false);
+ DAG.getEntryNode(), Offset, 4, false);
// The local size values will have the hi 16-bits as zero.
return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param,
DAG.getValueType(VT));
@@ -4404,37 +4409,37 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_X, false);
+ SI::KernelInputOffsets::NGROUPS_X, 4, false);
case Intrinsic::r600_read_ngroups_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Y, false);
+ SI::KernelInputOffsets::NGROUPS_Y, 4, false);
case Intrinsic::r600_read_ngroups_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::NGROUPS_Z, false);
+ SI::KernelInputOffsets::NGROUPS_Z, 4, false);
case Intrinsic::r600_read_global_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_X, 4, false);
case Intrinsic::r600_read_global_size_y:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_Y, 4, false);
case Intrinsic::r600_read_global_size_z:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
+ SI::KernelInputOffsets::GLOBAL_SIZE_Z, 4, false);
case Intrinsic::r600_read_local_size_x:
if (Subtarget->isAmdHsaOS())
return emitNonHSAIntrinsicError(DAG, DL, VT);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 15785761aa9..3fdd158a50d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -27,7 +27,7 @@ class SITargetLowering final : public AMDGPUTargetLowering {
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
SDValue lowerKernargMemParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
const SDLoc &SL, SDValue Chain,
- uint64_t Offset, bool Signed,
+ uint64_t Offset, unsigned Align, bool Signed,
const ISD::InputArg *Arg = nullptr) const;
SDValue lowerStackParameter(SelectionDAG &DAG, CCValAssign &VA,
OpenPOWER on IntegriCloud