summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp24
-rw-r--r--llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir48
3 files changed, 59 insertions, 14 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index f24510b1866..c28ff5ef6a0 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -668,7 +668,6 @@ void SIFoldOperands::foldOperand(
} else {
if (UseMI->isCopy() && OpToFold.isReg() &&
UseMI->getOperand(0).getReg().isVirtual() &&
- TRI->isVectorRegister(*MRI, UseMI->getOperand(0).getReg()) &&
!UseMI->getOperand(1).getSubReg()) {
LLVM_DEBUG(dbgs() << "Folding " << OpToFold
<< "\n into " << *UseMI << '\n');
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6e914330542..272a7fc442d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3921,20 +3921,18 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
? MRI.getRegClass(Reg)
: RI.getPhysRegClass(Reg);
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
- RC = TRI->getSubRegClass(RC, MO.getSubReg());
-
- // In order to be legal, the common sub-class must be equal to the
- // class of the current operand. For example:
- //
- // v_mov_b32 s0 ; Operand defined as vsrc_b32
- // ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
- //
- // s_sendmsg 0, s0 ; Operand defined as m0reg
- // ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
+ const TargetRegisterClass *DRC = RI.getRegClass(OpInfo.RegClass);
+ if (MO.getSubReg()) {
+ const MachineFunction *MF = MO.getParent()->getParent()->getParent();
+ const TargetRegisterClass *SuperRC = RI.getLargestLegalSuperClass(RC, *MF);
+ if (!SuperRC)
+ return false;
- return RI.getCommonSubClass(RC, RI.getRegClass(OpInfo.RegClass)) == RC;
+ DRC = RI.getMatchingSuperRegClass(SuperRC, DRC, MO.getSubReg());
+ if (!DRC)
+ return false;
+ }
+ return RC->hasSuperClassEq(DRC);
}
bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
diff --git a/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
new file mode 100644
index 00000000000..a33c97c4f5b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-sgpr-copy.mir
@@ -0,0 +1,48 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fold-operands,dead-mi-elimination -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_full
+# GCN: %0:sgpr_32 = IMPLICIT_DEF
+# GCN-NEXT: S_STORE_DWORD_IMM %0, undef $sgpr10_sgpr11, 0, 0, 0
+
+name: fold_sgpr_to_sgpr_copy_full
+body: |
+ bb.0:
+
+ %0:sgpr_32 = IMPLICIT_DEF
+ %1:sgpr_32 = COPY %0
+ %2:sgpr_32 = COPY %1
+ S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+...
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg
+# GCN: %0:sreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2:sgpr_32 = COPY %0.sub0
+# GCN-NEXT: S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+
+name: fold_sgpr_to_sgpr_copy_subreg
+body: |
+ bb.0:
+
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:sgpr_32 = COPY %0.sub0
+ %2:sgpr_32 = COPY %1
+ S_STORE_DWORD_IMM %2, undef $sgpr10_sgpr11, 0, 0, 0
+...
+
+# GCN-LABEL: name: fold_sgpr_to_sgpr_copy_subreg2
+# GCN: %0:sreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %3:sreg_32_xm0_xexec = COPY %0.sub0
+# GCN-NEXT: S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+
+name: fold_sgpr_to_sgpr_copy_subreg2
+body: |
+ bb.0:
+
+ %0:sreg_64 = IMPLICIT_DEF
+ %1:sgpr_32 = COPY %0.sub0
+ %2:sgpr_32 = COPY %1
+ %3:sreg_32_xm0_xexec = COPY %2
+ S_STORE_DWORD_IMM %3, undef $sgpr10_sgpr11, 0, 0, 0
+...
OpenPOWER on IntegriCloud