summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
diff options
context:
space:
mode:
authorTim Renouf <tpr.llvm@botech.co.uk>2019-03-21 12:01:21 +0000
committerTim Renouf <tpr.llvm@botech.co.uk>2019-03-21 12:01:21 +0000
commit361b5b2193421824925a72669f1d06cd63c3d9a7 (patch)
tree76cf94d30c3a4d9caf4150a96b93e988ac360445 /llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
parent92cbcfc325e08c07d5b0d5157f95ec0c90124e70 (diff)
downloadbcm5719-llvm-361b5b2193421824925a72669f1d06cd63c3d9a7.tar.gz
bcm5719-llvm-361b5b2193421824925a72669f1d06cd63c3d9a7.zip
[AMDGPU] Support for v3i32/v3f32
Added support for dwordx3 for most load/store types, but not DS, and not intrinsics yet. SI (gfx6) does not have dwordx3 instructions, so they are not enabled there. Some of this patch is from Matt Arsenault, also of AMD. Differential Revision: https://reviews.llvm.org/D58902 Change-Id: I913ef54f1433a7149da8d72f4af54dbb13436bd9 llvm-svn: 356659
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIInstrInfo.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp25
1 files changed, 17 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 878b456e1b5..8a35ef01457 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -562,7 +562,8 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned EltSize = 4;
unsigned Opcode = AMDGPU::V_MOV_B32_e32;
if (RI.isSGPRClass(RC)) {
- if (RI.getRegSizeInBits(*RC) > 32) {
+ // TODO: Copy vec3/vec5 with s_mov_b64s then final s_mov_b32.
+ if (!(RI.getRegSizeInBits(*RC) % 64)) {
Opcode = AMDGPU::S_MOV_B64;
EltSize = 8;
} else {
@@ -840,6 +841,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S32_SAVE;
case 8:
return AMDGPU::SI_SPILL_S64_SAVE;
+ case 12:
+ return AMDGPU::SI_SPILL_S96_SAVE;
case 16:
return AMDGPU::SI_SPILL_S128_SAVE;
case 32:
@@ -942,6 +945,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S32_RESTORE;
case 8:
return AMDGPU::SI_SPILL_S64_RESTORE;
+ case 12:
+ return AMDGPU::SI_SPILL_S96_RESTORE;
case 16:
return AMDGPU::SI_SPILL_S128_RESTORE;
case 32:
@@ -1916,14 +1921,18 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
const int16_t *SubIndices = Sub0_15;
int NElts = DstSize / 32;
- // 64-bit select is only avaialble for SALU.
+ // 64-bit select is only available for SALU.
+ // TODO: Split 96-bit into 64-bit and 32-bit, not 3x 32-bit.
if (Pred == SCC_TRUE) {
- SelOp = AMDGPU::S_CSELECT_B64;
- EltRC = &AMDGPU::SGPR_64RegClass;
- SubIndices = Sub0_15_64;
-
- assert(NElts % 2 == 0);
- NElts /= 2;
+ if (NElts % 2) {
+ SelOp = AMDGPU::S_CSELECT_B32;
+ EltRC = &AMDGPU::SGPR_32RegClass;
+ } else {
+ SelOp = AMDGPU::S_CSELECT_B64;
+ EltRC = &AMDGPU::SGPR_64RegClass;
+ SubIndices = Sub0_15_64;
+ NElts /= 2;
+ }
}
MachineInstrBuilder MIB = BuildMI(
OpenPOWER on IntegriCloud