diff options
| author | Jan Vesely <jan.vesely@rutgers.edu> | 2014-10-14 18:52:07 +0000 |
|---|---|---|
| committer | Jan Vesely <jan.vesely@rutgers.edu> | 2014-10-14 18:52:07 +0000 |
| commit | 86187d231aab058dccb00a17ecece2ac0b0998ec (patch) | |
| tree | 5de7758382357d37fab7eb20bfcbbffe18fa8d23 /llvm/lib | |
| parent | df19696374ca30f66354c460c155ad066d64c722 (diff) | |
| download | bcm5719-llvm-86187d231aab058dccb00a17ecece2ac0b0998ec.tar.gz bcm5719-llvm-86187d231aab058dccb00a17ecece2ac0b0998ec.zip | |
R600: Add new intrinsic to read work dimensions
v2: Add SI lowering
Add test
v3: Place work dimensions after the kernel arguments.
v4: Calculate offset while lowering arguments
v5: rebase
v6: change prefix to AMDGPU
Reviewed-by: Tom Stellard <tom@stellard.net>
Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
llvm-svn: 219705
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUMachineFunction.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/R600ISelLowering.cpp | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIISelLowering.cpp | 11 |
3 files changed, 20 insertions, 5 deletions
diff --git a/llvm/lib/Target/R600/AMDGPUMachineFunction.h b/llvm/lib/Target/R600/AMDGPUMachineFunction.h index 886fb1b1fc3..f5e4694e76f 100644 --- a/llvm/lib/Target/R600/AMDGPUMachineFunction.h +++ b/llvm/lib/Target/R600/AMDGPUMachineFunction.h @@ -30,6 +30,9 @@ public: /// Number of bytes in the LDS that are being used. unsigned LDSSize; + /// Start of implicit kernel args + unsigned ABIArgOffset; + unsigned getShaderType() const { return ShaderType; } diff --git a/llvm/lib/Target/R600/R600ISelLowering.cpp b/llvm/lib/Target/R600/R600ISelLowering.cpp index 87610e9cc55..dfc0eb1d203 100644 --- a/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -809,6 +809,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::r600_read_local_size_z: return LowerImplicitParameter(DAG, VT, DL, 8); + case Intrinsic::AMDGPU_read_workdim: + return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4); + case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, AMDGPU::T1_X, VT); @@ -1698,7 +1701,7 @@ SDValue R600TargetLowering::LowerFormalArguments( CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); MachineFunction &MF = DAG.getMachineFunction(); - unsigned ShaderType = MF.getInfo<R600MachineFunctionInfo>()->getShaderType(); + R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); SmallVector<ISD::InputArg, 8> LocalIns; @@ -1716,7 +1719,7 @@ SDValue R600TargetLowering::LowerFormalArguments( MemVT = MemVT.getVectorElementType(); } - if (ShaderType != ShaderType::COMPUTE) { + if (MFI->getShaderType() != ShaderType::COMPUTE) { unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass); SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT); InVals.push_back(Register); @@ -1748,16 +1751,18 @@ SDValue R600TargetLowering::LowerFormalArguments( unsigned ValBase = ArgLocs[In.OrigArgIndex].getLocMemOffset(); unsigned PartOffset = VA.getLocMemOffset(); + unsigned Offset = 36 + VA.getLocMemOffset(); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase); SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain, - DAG.getConstant(36 + PartOffset, MVT::i32), + DAG.getConstant(Offset, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo, MemVT, false, true, true, 4); // 4 is the preferred alignment for the CONSTANT memory space. InVals.push_back(Arg); + MFI->ABIArgOffset = Offset + MemVT.getStoreSize(); } return Chain; } diff --git a/llvm/lib/Target/R600/SIISelLowering.cpp b/llvm/lib/Target/R600/SIISelLowering.cpp index 49ac269998e..28881955156 100644 --- a/llvm/lib/Target/R600/SIISelLowering.cpp +++ b/llvm/lib/Target/R600/SIISelLowering.cpp @@ -519,11 +519,11 @@ SDValue SITargetLowering::LowerFormalArguments( if (VA.isMemLoc()) { VT = Ins[i].VT; EVT MemVT = Splits[i].VT; + const unsigned Offset = 36 + VA.getLocMemOffset(); // The first 36 bytes of the input buffer contains information about // thread group and global sizes. SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), - 36 + VA.getLocMemOffset(), - Ins[i].Flags.isSExt()); + Offset, Ins[i].Flags.isSExt()); const PointerType *ParamTy = dyn_cast<PointerType>(FType->getParamType(Ins[i].OrigArgIndex)); @@ -537,6 +537,7 @@ SDValue SITargetLowering::LowerFormalArguments( } InVals.push_back(Arg); + Info->ABIArgOffset = Offset + MemVT.getStoreSize(); continue; } assert(VA.isRegLoc() && "Parameter must be in a register!"); @@ -927,6 +928,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::r600_read_local_size_z: return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), SI::KernelInputOffsets::LOCAL_SIZE_Z, false); + + case Intrinsic::AMDGPU_read_workdim: + return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), + MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset, + false); + case Intrinsic::r600_read_tgid_x: return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT); |

