diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-19 14:29:30 +0000 | 
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2019-07-19 14:29:30 +0000 | 
| commit | b60a2ae40e73bce69f8dbf120f14e3491b70c17f (patch) | |
| tree | d8031d5c08c5ddcb126b7aeb94aa05c2675110c0 | |
| parent | fecf43eba3630aeb55c56e5d308f99a7bd05bfbe (diff) | |
| download | bcm5719-llvm-b60a2ae40e73bce69f8dbf120f14e3491b70c17f.tar.gz bcm5719-llvm-b60a2ae40e73bce69f8dbf120f14e3491b70c17f.zip  | |
AMDGPU/GlobalISel: Support arguments with multiple registers
Handles structs used directly in argument lists.
llvm-svn: 366584
4 files changed, 89 insertions, 42 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index a118743f4d8..8fc55cba494 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -152,33 +152,45 @@ void AMDGPUCallLowering::splitToValueTypes(    SmallVector<EVT, 4> SplitVTs;    ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs); -  EVT VT = SplitVTs[0]; -  unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); - -  if (NumParts == 1) { -    // No splitting to do, but we want to replace the original type (e.g. [1 x -    // double] -> double). -    SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Ctx), -                           OrigArg.Flags, OrigArg.IsFixed); -    return; -  } +  assert(OrigArg.Regs.size() == SplitVTs.size()); -  LLT LLTy = getLLTForType(*OrigArg.Ty, DL); -  SmallVector<Register, 8> SplitRegs; +  int SplitIdx = 0; +  for (EVT VT : SplitVTs) { +    unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT); +    Type *Ty = VT.getTypeForEVT(Ctx); -  EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); -  Type *PartTy = PartVT.getTypeForEVT(Ctx); -  LLT PartLLT = getLLTForType(*PartTy, DL); -  // FIXME: Should we be reporting all of the part registers for a single -  // argument, and let handleAssignments take care of the repacking? -  for (unsigned i = 0; i < NumParts; ++i) { -    Register PartReg = MRI.createGenericVirtualRegister(PartLLT); -    SplitRegs.push_back(PartReg); -    SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); -  } -  PerformArgSplit(SplitRegs, LLTy, PartLLT); +    if (NumParts == 1) { +      // No splitting to do, but we want to replace the original type (e.g. [1 x +      // double] -> double). +      SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty, +                             OrigArg.Flags, OrigArg.IsFixed); + +      ++SplitIdx; +      continue; +    } + +    LLT LLTy = getLLTForType(*Ty, DL); + +    SmallVector<Register, 8> SplitRegs; + +    EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT); +    Type *PartTy = PartVT.getTypeForEVT(Ctx); +    LLT PartLLT = getLLTForType(*PartTy, DL); + +    // FIXME: Should we be reporting all of the part registers for a single +    // argument, and let handleAssignments take care of the repacking? +    for (unsigned i = 0; i < NumParts; ++i) { +      Register PartReg = MRI.createGenericVirtualRegister(PartLLT); +      SplitRegs.push_back(PartReg); +      SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags); +    } + +    PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx); + +    ++SplitIdx; +  }  }  bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -485,11 +497,11 @@ bool AMDGPUCallLowering::lowerFormalArguments(      if (!IsShader && InReg)        return false; -    // TODO: Handle multiple registers and sret. +    // TODO: Handle sret.      if (Arg.hasAttribute(Attribute::StructRet) ||          Arg.hasAttribute(Attribute::SwiftSelf) ||          Arg.hasAttribute(Attribute::SwiftError) || -        Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1) +        Arg.hasAttribute(Attribute::Nest))        return false;      if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) { @@ -505,7 +517,9 @@ bool AMDGPUCallLowering::lowerFormalArguments(        ++PSInputNum;        if (SkipArg) { -        MIRBuilder.buildUndef(VRegs[Idx][0]); +        for (int I = 0, E = VRegs[Idx].size(); I != E; ++I) +          MIRBuilder.buildUndef(VRegs[Idx][I]); +          ++Idx;          continue;        } @@ -513,11 +527,14 @@ bool AMDGPUCallLowering::lowerFormalArguments(      ArgInfo OrigArg(VRegs[Idx], Arg.getType());      setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F); -    splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CC, + +    splitToValueTypes( +      OrigArg, SplitArgs, DL, MRI, CC,        // FIXME: We should probably be passing multiple registers to        // handleAssignments to do this -      [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT) { -        packSplitRegsToOrigType(MIRBuilder, VRegs[Idx], Regs, LLTy, PartLLT); +      [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) { +        packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs, +                                LLTy, PartLLT);        });      ++Idx; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h index 97f8c85b9e7..bc345b6c659 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -30,7 +30,7 @@ class AMDGPUCallLowering: public CallLowering {                        Register DstReg) const;    /// A function of this type is used to perform value split action. -  using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT)>; +  using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT, int)>;    void splitToValueTypes(const ArgInfo &OrigArgInfo,                           SmallVectorImpl<ArgInfo> &SplitArgs, diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll index 0d11aa99395..e5e8b6840d2 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll @@ -1,10 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py  ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s  ; Check that we correctly skip over disabled inputs -; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2 -; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0 -; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]](s32), [[S0]](s32), [[S0]](s32), [[V0]](s32) -define amdgpu_ps void @ps0(float inreg %arg0, float %psinput0, float %psinput1) #1 { +define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 { +  ; CHECK-LABEL: name: disabled_input +  ; CHECK: bb.1.main_body: +  ; CHECK:   liveins: $sgpr2, $vgpr0 +  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 +  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 +  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF +  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 +  ; CHECK:   [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false +  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1) +  ; CHECK:   S_ENDPGM 0 +main_body: +  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 +  ret void +} + +define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) #1 { +  ; CHECK-LABEL: name: disabled_input_struct +  ; CHECK: bb.1.main_body: +  ; CHECK:   liveins: $sgpr2, $vgpr0 +  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 +  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 +  ; CHECK:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF +  ; CHECK:   [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF +  ; CHECK:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 +  ; CHECK:   [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15 +  ; CHECK:   [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false +  ; CHECK:   G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1) +  ; CHECK:   S_ENDPGM 0  main_body:    call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0    ret void diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll index cf08f695c72..dcee8eaf799 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll @@ -1,10 +1,5 @@  ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o - %s 2> %t  | FileCheck %s -; RUN: FileCheck -check-prefix=ERR %s < %t - -; ERR-NOT: remark -; ERR: remark: <unknown>:0:0: unable to lower arguments: void ({ i8, i32 })* (in function: void_func_struct_i8_i32) -; ERR-NOT: remark +; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s  define void @void_func_i1(i1 %arg0) #0 {    ; CHECK-LABEL: name: void_func_i1 @@ -1138,9 +1133,17 @@ define void @void_func_struct_i32({ i32 } %arg0) #0 {  define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {    ; CHECK-LABEL: name: void_func_struct_i8_i32 -  ; CHECK: bb.0: -  ; CHECK:   successors: %bb.1(0x80000000)    ; CHECK: bb.1 (%ir-block.0): +  ; CHECK:   liveins: $vgpr0, $vgpr1 +  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 +  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32) +  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 +  ; CHECK:   [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF +  ; CHECK:   G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1) +  ; CHECK:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 +  ; CHECK:   [[GEP:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C]](s64) +  ; CHECK:   G_STORE [[COPY1]](s32), [[GEP]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1) +  ; CHECK:   S_ENDPGM 0    store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef    ret void  }  | 

