diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 37 |
1 files changed, 21 insertions, 16 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 4519b987b55..cc326cdce88 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1068,15 +1068,12 @@ SDValue SITargetLowering::convertArgType(SelectionDAG &DAG, EVT VT, EVT MemVT, SDValue SITargetLowering::lowerKernargMemParameter( SelectionDAG &DAG, EVT VT, EVT MemVT, const SDLoc &SL, SDValue Chain, - uint64_t Offset, bool Signed, + uint64_t Offset, unsigned Align, bool Signed, const ISD::InputArg *Arg) const { - const DataLayout &DL = DAG.getDataLayout(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); - unsigned Align = DL.getABITypeAlignment(Ty); - SDValue Ptr = lowerKernArgParameterPtr(DAG, SL, Chain, Offset); SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align, MachineMemOperand::MODereferenceable | @@ -1663,7 +1660,15 @@ SDValue SITargetLowering::LowerFormalArguments( SmallVector<SDValue, 16> Chains; - for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { + // FIXME: This is the minimum kernel argument alignment. We should improve + // this to the maximum alignment of the arguments. + // + // FIXME: Alignment of explicit arguments totally broken with non-0 explicit + // kern arg offset. + const unsigned KernelArgBaseAlign = 16; + const unsigned ExplicitOffset = Subtarget->getExplicitKernelArgOffset(Fn); + + for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { const ISD::InputArg &Arg = Ins[i]; if (Skipped[i]) { InVals.push_back(DAG.getUNDEF(Arg.VT)); @@ -1677,14 +1682,14 @@ SDValue SITargetLowering::LowerFormalArguments( VT = Ins[i].VT; EVT MemVT = VA.getLocVT(); - const uint64_t Offset = Subtarget->getExplicitKernelArgOffset(Fn) + - VA.getLocMemOffset(); + const uint64_t Offset = ExplicitOffset + VA.getLocMemOffset(); Info->setABIArgOffset(Offset + MemVT.getStoreSize()); + unsigned Align = MinAlign(KernelArgBaseAlign, Offset); // The first 36 bytes of the input buffer contains information about - // thread group and global sizes. + // thread group and global sizes for clover. SDValue Arg = lowerKernargMemParameter( - DAG, VT, MemVT, DL, Chain, Offset, Ins[i].Flags.isSExt(), &Ins[i]); + DAG, VT, MemVT, DL, Chain, Offset, Align, Ins[i].Flags.isSExt(), &Ins[i]); Chains.push_back(Arg.getValue(1)); auto *ParamTy = @@ -4303,7 +4308,7 @@ SDValue SITargetLowering::lowerImplicitZextParam(SelectionDAG &DAG, unsigned Offset) const { SDLoc SL(Op); SDValue Param = lowerKernargMemParameter(DAG, MVT::i32, MVT::i32, SL, - DAG.getEntryNode(), Offset, false); + DAG.getEntryNode(), Offset, 4, false); // The local size values will have the hi 16-bits as zero. return DAG.getNode(ISD::AssertZext, SL, MVT::i32, Param, DAG.getValueType(VT)); @@ -4404,37 +4409,37 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return emitNonHSAIntrinsicError(DAG, DL, VT); return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::NGROUPS_X, false); + SI::KernelInputOffsets::NGROUPS_X, 4, false); case Intrinsic::r600_read_ngroups_y: if (Subtarget->isAmdHsaOS()) return emitNonHSAIntrinsicError(DAG, DL, VT); return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::NGROUPS_Y, false); + SI::KernelInputOffsets::NGROUPS_Y, 4, false); case Intrinsic::r600_read_ngroups_z: if (Subtarget->isAmdHsaOS()) return emitNonHSAIntrinsicError(DAG, DL, VT); return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::NGROUPS_Z, false); + SI::KernelInputOffsets::NGROUPS_Z, 4, false); case Intrinsic::r600_read_global_size_x: if (Subtarget->isAmdHsaOS()) return emitNonHSAIntrinsicError(DAG, DL, VT); return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::GLOBAL_SIZE_X, false); + SI::KernelInputOffsets::GLOBAL_SIZE_X, 4, false); case Intrinsic::r600_read_global_size_y: if (Subtarget->isAmdHsaOS()) return emitNonHSAIntrinsicError(DAG, DL, VT); return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::GLOBAL_SIZE_Y, false); + SI::KernelInputOffsets::GLOBAL_SIZE_Y, 4, false); case Intrinsic::r600_read_global_size_z: if (Subtarget->isAmdHsaOS()) return emitNonHSAIntrinsicError(DAG, DL, VT); return lowerKernargMemParameter(DAG, VT, VT, DL, DAG.getEntryNode(), - SI::KernelInputOffsets::GLOBAL_SIZE_Z, false); + SI::KernelInputOffsets::GLOBAL_SIZE_Z, 4, false); case Intrinsic::r600_read_local_size_x: if (Subtarget->isAmdHsaOS()) return emitNonHSAIntrinsicError(DAG, DL, VT); |