diff options
| author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-10-25 12:40:16 -0700 |
|---|---|---|
| committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-10-25 15:08:30 -0700 |
| commit | 4c0251da149c99f49550d6c938e6e7f45075194d (patch) | |
| tree | 891140199f07abefb60d78aed89bfde89f5945d9 /llvm | |
| parent | 27887bc1e7a19bc568db775903d8febdeab2f617 (diff) | |
| download | bcm5719-llvm-4c0251da149c99f49550d6c938e6e7f45075194d.tar.gz bcm5719-llvm-4c0251da149c99f49550d6c938e6e7f45075194d.zip | |
[AMDGPU] Enable SGPR copy folding
That used to fail in the last testcase function because after
%0:sreg_64.sub0 was folded into %3:sreg_32_xm0_xexec COPY, it
was further folded into S_STORE_DWORD_IMM. Its legal effective
subreg class is SReg_32 while instruction expects more restricted
SReg_32_XM0_EXEC. However, SIInstrInfo::isLegalRegOperand()
passed the legality check and it was caught in the verifier.
Borrowed code from the verifier to check for RC legality.
Differential Revision: https://reviews.llvm.org/D69445
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir | 48 |
3 files changed, 59 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index f24510b1866..c28ff5ef6a0 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -668,7 +668,6 @@ void SIFoldOperands::foldOperand( } else { if (UseMI->isCopy() && OpToFold.isReg() && UseMI->getOperand(0).getReg().isVirtual() && - TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) && !UseMI->getOperand(1).getSubReg()) { LLVM_DEBUG(dbgs() << "Folding " << OpToFold << "\n into " << *UseMI << '\n'); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 6e914330542..272a7fc442d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3921,20 +3921,18 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI, ? MRI.getRegClass(Reg) : RI.getPhysRegClass(Reg); - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo()); - RC = TRI->getSubRegClass(RC, MO.getSubReg()); - - // In order to be legal, the common sub-class must be equal to the - // class of the current operand. For example: - // - // v_mov_b32 s0 ; Operand defined as vsrc_b32 - // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL - // - // s_sendmsg 0, s0 ; Operand defined as m0reg - // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL + const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass); + if (MO.getSubReg()) { + const MachineFunction *MF = MO.getParent()->getParent()->getParent(); + const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF); + if (!SuperRC) + return false; - return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC; + DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg()); + if (!DRC) + return false; + } + return RC->hasSuperClassEq(DRC); } bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI, diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir new file mode 100644 index 00000000000..a33c97c4f5b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir @@ -0,0 +1,48 @@ +# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- + +# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_full +# GCN: %0:sgpr_32 = IMPLICIT_DEF +# GCN-NEXT: S_STORE_DWORD_IMM %0, undef $sgpr10_sgpr11, 0, 0, 0 + +name: fold_sgpr_to_sgpr_copy_full +body: | + bb.0: + + %0:sgpr_32 = IMPLICIT_DEF + %1:sgpr_32 = COPY %0 + %2:sgpr_32 = COPY %1 + S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0 +... + +# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg +# GCN: %0:sreg_64 = IMPLICIT_DEF +# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0 +# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0 + +name: fold_sgpr_to_sgpr_copy_subreg +body: | + bb.0: + + %0:sreg_64 = IMPLICIT_DEF + %1:sgpr_32 = COPY %0.sub0 + %2:sgpr_32 = COPY %1 + S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0 +... + +# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2 +# GCN: %0:sreg_64 = IMPLICIT_DEF +# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0 +# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0 + +name: fold_sgpr_to_sgpr_copy_subreg2 +body: | + bb.0: + + %0:sreg_64 = IMPLICIT_DEF + %1:sgpr_32 = COPY %0.sub0 + %2:sgpr_32 = COPY %1 + %3:sreg_32_xm0_xexec = COPY %2 + S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0 +... |

