diff options
| author | Tom Stellard <tstellar@redhat.com> | 2019-04-09 02:26:03 +0000 |
|---|---|---|
| committer | Tom Stellard <tstellar@redhat.com> | 2019-04-09 02:26:03 +0000 |
| commit | 206b9927f8362fe0a65151f76ec67caffe39d3ad (patch) | |
| tree | ea759146c3829f3ac4a7817407b09c39fb815806 | |
| parent | b78094abcf526e78c14867e94c3fe1d2ad666590 (diff) | |
| download | bcm5719-llvm-206b9927f8362fe0a65151f76ec67caffe39d3ad.tar.gz bcm5719-llvm-206b9927f8362fe0a65151f76ec67caffe39d3ad.zip | |
AMDGPU/GlobalISel: Implement call lowering for shaders returning values
Reviewers: arsenm, nhaehnle
Subscribers: kzhuravl, jvesely, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, llvm-commits
Differential Revision: https://reviews.llvm.org/D57166
llvm-svn: 357964
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 76 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll | 21 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/todo.ll | 10 |
3 files changed, 94 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index 4045b18be36..249498e35ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -20,12 +20,49 @@ #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/LowLevelTypeImpl.h" using namespace llvm; +namespace { + +struct OutgoingArgHandler : public CallLowering::ValueHandler { + OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstrBuilder MIB, CCAssignFn *AssignFn) + : ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB) {} + + MachineInstrBuilder MIB; + + unsigned getStackAddress(uint64_t Size, int64_t Offset, + MachinePointerInfo &MPO) override { + llvm_unreachable("not implemented"); + } + + void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size, + MachinePointerInfo &MPO, CCValAssign &VA) override { + llvm_unreachable("not implemented"); + } + + void assignValueToReg(unsigned ValVReg, unsigned PhysReg, + CCValAssign &VA) override { + MIB.addUse(PhysReg); + MIRBuilder.buildCopy(PhysReg, ValVReg); + } + + bool assignArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + const CallLowering::ArgInfo &Info, + CCState &State) override { + return AssignFn(ValNo, ValVT, LocVT, LocInfo, Info.Flags, State); + } +}; + +} + AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) : CallLowering(&TLI) { } @@ -33,11 +70,44 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val, ArrayRef<unsigned> VRegs) const { - // FIXME: Add support for non-void returns. - if (Val) + + MachineFunction &MF = MIRBuilder.getMF(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + MFI->setIfReturnsVoid(!Val); + + if (!Val) { + MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); + return true; + } + + unsigned VReg = VRegs[0]; + + const Function &F = MF.getFunction(); + auto &DL = F.getParent()->getDataLayout(); + if (!AMDGPU::isShader(F.getCallingConv())) + return false; + + + const AMDGPUTargetLowering &TLI = *getTLI<AMDGPUTargetLowering>(); + SmallVector<EVT, 4> SplitVTs; + SmallVector<uint64_t, 4> Offsets; + ArgInfo OrigArg{VReg, Val->getType()}; + setArgFlags(OrigArg, AttributeList::ReturnIndex, DL, F); + ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0); + + SmallVector<ArgInfo, 8> SplitArgs; + CCAssignFn *AssignFn = CCAssignFnForReturn(F.getCallingConv(), false); + for (unsigned i = 0, e = Offsets.size(); i != e; ++i) { + Type *SplitTy = SplitVTs[i].getTypeForEVT(F.getContext()); + SplitArgs.push_back({VRegs[i], SplitTy, OrigArg.Flags, OrigArg.IsFixed}); + } + auto RetInstr = MIRBuilder.buildInstrNoInsert(AMDGPU::SI_RETURN_TO_EPILOG); + OutgoingArgHandler Handler(MIRBuilder, MRI, RetInstr, AssignFn); + if (!handleAssignments(MIRBuilder, SplitArgs, Handler)) return false; + MIRBuilder.insertInstr(RetInstr); - MIRBuilder.buildInstr(AMDGPU::S_ENDPGM).addImm(0); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll index 00efe521516..8d87ce1ddb1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_vs.ll @@ -55,6 +55,27 @@ define amdgpu_vs void @test_order(float inreg %arg0, float inreg %arg1, float %a ret void } +; CHECK-LABEL: name: ret_struct +; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr0 +; CHECK: [[S1:%[0-9]+]]:_(s32) = COPY $sgpr1 +; CHECK: $sgpr0 = COPY [[S0]] +; CHECK: $sgpr1 = COPY [[S1]] +; CHECK: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1 +define amdgpu_vs <{ i32, i32 }> @ret_struct(i32 inreg %arg0, i32 inreg %arg1) { +main_body: + %tmp0 = insertvalue <{ i32, i32 }> undef, i32 %arg0, 0 + %tmp1 = insertvalue <{ i32, i32 }> %tmp0, i32 %arg1, 1 + ret <{ i32, i32 }> %tmp1 +} + +; CHECK_LABEL: name: non_void_ret +; CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +; CHECK: $sgpr0 = COPY [[ZERO]] +; SI_RETURN_TO_EPILOG $sgpr0 +define amdgpu_vs i32 @non_void_ret() { + ret i32 0 +} + declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/todo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/todo.ll deleted file mode 100644 index b721f59afe6..00000000000 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/todo.ll +++ /dev/null @@ -1,10 +0,0 @@ -; RUN: llc -mtriple=amdgcn-mesa-mesa3d -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s - -; This isn't implemented, but we need to make sure we fall back to SelectionDAG -; instead of generating wrong code. -; CHECK: warning: Instruction selection used fallback path for non_void_ret -; CHECK: non_void_ret: -; CHECK-NOT: s_endpgm -define amdgpu_vs i32 @non_void_ret() { - ret i32 0 -} |

