summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-07 19:16:26 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-07 19:16:26 +0000
commit538b73b7976c83e0224239b14aa1354e5d57138d (patch)
tree1dbd2684f42b1968897d54bb4ba7c0341a10bee7
parent4bcdcad91bc6548790c95e9f9c3ca062515518ea (diff)
downloadbcm5719-llvm-538b73b7976c83e0224239b14aa1354e5d57138d.tar.gz
bcm5719-llvm-538b73b7976c83e0224239b14aa1354e5d57138d.zip
AMDGPU/GlobalISel: Handle more G_INSERT cases
Start manually writing a table to get the subreg index. TableGen should probably generate this, but I'm not sure what it looks like in the arbitrary case where subregisters are allowed to not fully cover the super-registers. llvm-svn: 373947
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp66
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h2
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir150
4 files changed, 185 insertions, 77 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index aa165d4ce21..afdeacc4291 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -555,48 +555,6 @@ bool AMDGPUInstructionSelector::selectG_IMPLICIT_DEF(MachineInstr &I) const {
return false;
}
-// FIXME: TableGen should generate something to make this manageable for all
-// register classes. At a minimum we could use the opposite of
-// composeSubRegIndices and go up from the base 32-bit subreg.
-static unsigned getSubRegForSizeAndOffset(const SIRegisterInfo &TRI,
- unsigned Size, unsigned Offset) {
- switch (Size) {
- case 32:
- return TRI.getSubRegFromChannel(Offset / 32);
- case 64: {
- switch (Offset) {
- case 0:
- return AMDGPU::sub0_sub1;
- case 32:
- return AMDGPU::sub1_sub2;
- case 64:
- return AMDGPU::sub2_sub3;
- case 96:
- return AMDGPU::sub4_sub5;
- case 128:
- return AMDGPU::sub5_sub6;
- case 160:
- return AMDGPU::sub7_sub8;
- // FIXME: Missing cases up to 1024 bits
- default:
- return AMDGPU::NoSubRegister;
- }
- }
- case 96: {
- switch (Offset) {
- case 0:
- return AMDGPU::sub0_sub1_sub2;
- case 32:
- return AMDGPU::sub1_sub2_sub3;
- case 64:
- return AMDGPU::sub2_sub3_sub4;
- }
- }
- default:
- return AMDGPU::NoSubRegister;
- }
-}
-
bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
MachineBasicBlock *BB = I.getParent();
@@ -612,7 +570,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const {
if (Offset % 32 != 0)
return false;
- unsigned SubReg = getSubRegForSizeAndOffset(TRI, InsSize, Offset);
+ unsigned SubReg = TRI.getSubRegFromChannel(Offset / 32, InsSize / 32);
if (SubReg == AMDGPU::NoSubRegister)
return false;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
index 7cffdf1a4dc..9806e6b0714 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp
@@ -26,19 +26,59 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {}
// they are not supported at this time.
//===----------------------------------------------------------------------===//
-unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel) {
- static const unsigned SubRegs[] = {
- AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
- AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, AMDGPU::sub8, AMDGPU::sub9,
- AMDGPU::sub10, AMDGPU::sub11, AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14,
- AMDGPU::sub15, AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
- AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, AMDGPU::sub24,
- AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, AMDGPU::sub28, AMDGPU::sub29,
- AMDGPU::sub30, AMDGPU::sub31
- };
-
- assert(Channel < array_lengthof(SubRegs));
- return SubRegs[Channel];
+// Table of NumRegs sized pieces at every 32-bit offset.
+static const uint16_t SubRegFromChannelTable[][32] = {
+ { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+ AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+ AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+ AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+ AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+ AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+ AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+ AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31
+ },
+ {
+ AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4,
+ AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8,
+ AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
+ AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16,
+ AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20,
+ AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
+ AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28,
+ AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister
+ },
+ {
+ AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
+ AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
+ AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
+ AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
+ AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
+ AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
+ AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
+ AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+ },
+ {
+ AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
+ AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
+ AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
+ AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
+ AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
+ AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
+ AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
+ AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister
+ }
+};
+
+// FIXME: TableGen should generate something to make this manageable for all
+// register classes. At a minimum we could use the opposite of
+// composeSubRegIndices and go up from the base 32-bit subreg.
+unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) {
+ const unsigned NumRegIndex = NumRegs - 1;
+
+ assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
+ "Not implemented");
+ assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
+ return SubRegFromChannelTable[NumRegIndex][Channel];
}
void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
index 3453a8c1b0b..9e713ca804a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h
@@ -28,7 +28,7 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
/// \returns the sub reg enum value for the given \p Channel
/// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0)
- static unsigned getSubRegFromChannel(unsigned Channel);
+ static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1);
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
};
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
index 3cd1b463b57..c120c961741 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-insert.mir
@@ -303,41 +303,46 @@ body: |
---
-name: insert_s_s256_s_s64_96
+name: insert_s_v256_v_s64_96
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9
+ ; CHECK-LABEL: name: insert_s_v256_v_s64_96
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr8_vgpr9
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:vgpr(s64) = COPY $vgpr8_vgpr9
+ %2:vgpr(s256) = G_INSERT %0, %1, 96
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_s_s256_s_s64_128
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
- ; CHECK-LABEL: name: insert_s_s256_s_s64_96
+ ; CHECK-LABEL: name: insert_s_s256_s_s64_128
; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
- ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_64_xexec = COPY $sgpr4_sgpr5
; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5
; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
%0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
- %1:sgpr(s64) = COPY $sgpr8_sgpr9
- %2:sgpr(s256) = G_INSERT %0, %1, 96
+ %1:sgpr(s64) = COPY $sgpr4_sgpr5
+ %2:sgpr(s256) = G_INSERT %0, %1, 128
S_ENDPGM 0, implicit %2
...
# ---
-# name: insert_s_s256_s_s64_128
-# legalized: true
-# regBankSelected: true
-
-# body: |
-# bb.0:
-# liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9
-# %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
-# %1:sgpr(s64) = COPY $sgpr4_sgpr5
-# %2:sgpr(s256) = G_INSERT %0, %1, 128
-# S_ENDPGM 0, implicit %2
-# ...
-
-# ---
-
# name: insert_s_s256_s_s64_160
# legalized: true
# regBankSelected: true
@@ -450,3 +455,108 @@ body: |
%2:sgpr(s160) = G_INSERT %0, %1, 64
S_ENDPGM 0, implicit %2
...
+
+---
+
+name: insert_s_s256_s_s128_0
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11
+
+ ; CHECK-LABEL: name: insert_s_s256_s_s128_0
+ ; CHECK: [[COPY:%[0-9]+]]:sreg_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ ; CHECK: [[COPY1:%[0-9]+]]:sreg_128 = COPY $sgpr8_sgpr9_sgpr10_sgpr11
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:sreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub0_sub1_sub2_sub3
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:sgpr(s256) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+ %1:sgpr(s128) = COPY $sgpr8_sgpr9_sgpr10_sgpr11
+ %2:sgpr(s256) = G_INSERT %0, %1, 0
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s256_v_s128_32
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+ ; CHECK-LABEL: name: insert_v_s256_v_s128_32
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub1_sub2_sub3_sub4
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ %2:vgpr(s256) = G_INSERT %0, %1, 32
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s256_v_s128_64
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+ ; CHECK-LABEL: name: insert_v_s256_v_s128_64
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub2_sub3_sub4_sub5
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ %2:vgpr(s256) = G_INSERT %0, %1, 64
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s256_v_s128_96
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+ ; CHECK-LABEL: name: insert_v_s256_v_s128_96
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub3_sub4_sub5_sub6
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ %2:vgpr(s256) = G_INSERT %0, %1, 96
+ S_ENDPGM 0, implicit %2
+...
+
+---
+
+name: insert_v_s256_v_s128_128
+legalized: true
+regBankSelected: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11
+
+ ; CHECK-LABEL: name: insert_v_s256_v_s128_128
+ ; CHECK: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ ; CHECK: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ ; CHECK: [[INSERT_SUBREG:%[0-9]+]]:vreg_256 = INSERT_SUBREG [[COPY]], [[COPY1]], %subreg.sub4_sub5_sub6_sub7
+ ; CHECK: S_ENDPGM 0, implicit [[INSERT_SUBREG]]
+ %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+ %1:vgpr(s128) = COPY $vgpr8_vgpr9_vgpr10_vgpr11
+ %2:vgpr(s256) = G_INSERT %0, %1, 128
+ S_ENDPGM 0, implicit %2
+...
OpenPOWER on IntegriCloud