summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-24 17:54:12 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-06-24 17:54:12 +0000
commit8fcd5ade3e5e9ca79180d72de47c99903511cc15 (patch)
tree1b82174cc698695306171546e466df049efe7c77
parent6e04b92c896ca37f0fa822de130400f46e9fc908 (diff)
downloadbcm5719-llvm-8fcd5ade3e5e9ca79180d72de47c99903511cc15.tar.gz
bcm5719-llvm-8fcd5ade3e5e9ca79180d72de47c99903511cc15.zip
AMDGPU/GlobalISel: Split VALU s64 G_ZEXT/G_SEXT in RegBankSelect
Scalar extends to s64 can use S_BFE_{I64|U64}, but vector extends need to extend to the 32-bit half, and then to 64. I'm not sure what the line should be between what RegBankSelect handles, and what instruction select does, but for now I'm erring on the side of RegBankSelect for future post-RBS combines. llvm-svn: 364212
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp70
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir51
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir48
3 files changed, 144 insertions, 25 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index c8c40f05dce..7cfd580e81e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -830,19 +830,53 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case AMDGPU::G_ZEXT: {
Register SrcReg = MI.getOperand(1).getReg();
LLT SrcTy = MRI.getType(SrcReg);
- if (SrcTy != LLT::scalar(1))
- return;
+ bool Signed = Opc == AMDGPU::G_SEXT;
MachineIRBuilder B(MI);
- bool Signed = Opc == AMDGPU::G_SEXT;
+ const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
+
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
- const RegisterBank *SrcBank = getRegBank(SrcReg, MRI, *TRI);
- if (SrcBank->getID() == AMDGPU::SCCRegBankID ||
- SrcBank->getID() == AMDGPU::VCCRegBankID) {
- const RegisterBank *DstBank = getRegBank(DstReg, MRI, *TRI);
- unsigned DstSize = DstTy.getSizeInBits();
+ if (DstTy.isScalar() &&
+ SrcBank != &AMDGPU::SGPRRegBank &&
+ SrcBank != &AMDGPU::SCCRegBank &&
+ SrcBank != &AMDGPU::VCCRegBank &&
+ // FIXME: Should handle any type that round to s64 when irregular
+ // breakdowns supported.
+ DstTy.getSizeInBits() == 64 &&
+ SrcTy.getSizeInBits() <= 32) {
+ const LLT S32 = LLT::scalar(32);
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+ // Extend to 32-bit, and then extend the low half.
+ if (Signed) {
+ // TODO: Should really be buildSExtOrCopy
+ B.buildSExtOrTrunc(DefRegs[0], SrcReg);
+
+ // Replicate sign bit from 32-bit extended part.
+ auto ShiftAmt = B.buildConstant(S32, 31);
+ MRI.setRegBank(ShiftAmt.getReg(0), *SrcBank);
+ B.buildAShr(DefRegs[1], DefRegs[0], ShiftAmt);
+ } else {
+ B.buildZExtOrTrunc(DefRegs[0], SrcReg);
+ B.buildConstant(DefRegs[1], 0);
+ }
+ MRI.setRegBank(DstReg, *SrcBank);
+ MI.eraseFromParent();
+ return;
+ }
+
+ if (SrcTy != LLT::scalar(1))
+ return;
+
+ if (SrcBank == &AMDGPU::SCCRegBank || SrcBank == &AMDGPU::VCCRegBank) {
+ SmallVector<Register, 2> DefRegs(OpdMapper.getVRegs(0));
+
+ const RegisterBank *DstBank = SrcBank == &AMDGPU::SCCRegBank ?
+ &AMDGPU::SGPRRegBank : &AMDGPU::VGPRRegBank;
+
+ unsigned DstSize = DstTy.getSizeInBits();
// 64-bit select is SGPR only
const bool UseSel64 = DstSize > 32 &&
SrcBank->getID() == AMDGPU::SCCRegBankID;
@@ -854,10 +888,11 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
MRI.setRegBank(True.getReg(0), *DstBank);
MRI.setRegBank(False.getReg(0), *DstBank);
+ MRI.setRegBank(DstReg, *DstBank);
+
if (DstSize > 32 && SrcBank->getID() != AMDGPU::SCCRegBankID) {
- auto Sel = B.buildSelect(SelType, SrcReg, True, False);
- MRI.setRegBank(Sel.getReg(0), *DstBank);
- B.buildMerge(DstReg, { Sel.getReg(0), Sel.getReg(0) });
+ B.buildSelect(DefRegs[0], SrcReg, True, False);
+ B.buildCopy(DefRegs[1], DefRegs[0]);
} else if (DstSize < 32) {
auto Sel = B.buildSelect(SelType, SrcReg, True, False);
MRI.setRegBank(Sel.getReg(0), *DstBank);
@@ -1313,8 +1348,17 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
- OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
- OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+ // TODO: Should anyext be split into 32-bit part as well?
+ if (MI.getOpcode() == AMDGPU::G_ANYEXT) {
+ OpdsMapping[0] = AMDGPU::getValueMapping(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMapping(SrcBank->getID(), SrcSize);
+ } else {
+ // Scalar extend can use 64-bit BFE, but VGPRs require extending to
+ // 32-bits, and then to 64.
+ OpdsMapping[0] = AMDGPU::getValueMappingSGPR64Only(DstBank, DstSize);
+ OpdsMapping[1] = AMDGPU::getValueMappingSGPR64Only(SrcBank->getID(),
+ SrcSize);
+ }
break;
}
case AMDGPU::G_FCMP: {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
index 2e1179325de..8117db8e943 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir
@@ -17,6 +17,22 @@ body: |
...
---
+name: sext_s16_to_s64_s
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; CHECK-LABEL: name: sext_s16_to_s64_s
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:sgpr(s64) = G_SEXT [[TRUNC]](s16)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_SEXT %1
+...
+
+---
name: sext_s32_to_s64_v
legalized: true
@@ -25,7 +41,10 @@ body: |
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: sext_s32_to_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s64) = G_SEXT [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+ ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[COPY1]], [[C]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[ASHR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s64) = G_SEXT %0
...
@@ -146,7 +165,8 @@ body: |
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 -1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -258,11 +278,30 @@ body: |
; CHECK-LABEL: name: sext_s1_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s64) = G_ASHR [[SHL]], [[C]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s1)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+ ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_SEXT %1
...
+
+---
+name: sext_s16_to_s64_vgpr
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: sext_s16_to_s64_vgpr
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[SEXT:%[0-9]+]]:vgpr(s32) = G_SEXT [[TRUNC]](s16)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 31
+ ; CHECK: [[ASHR:%[0-9]+]]:vgpr(s32) = G_ASHR [[SEXT]], [[C]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SEXT]](s32), [[ASHR]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_SEXT %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
index bea06e86d2c..207a2e53668 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir
@@ -17,6 +17,22 @@ body: |
...
---
+name: zext_s16_to_s64_s
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $sgpr0
+ ; CHECK-LABEL: name: zext_s16_to_s64_s
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:sgpr(s64) = G_ZEXT [[TRUNC]](s16)
+ %0:_(s32) = COPY $sgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_ZEXT %1
+...
+
+---
name: zext_s32_to_s64_v
legalized: true
@@ -25,7 +41,9 @@ body: |
liveins: $vgpr0_vgpr1
; CHECK-LABEL: name: zext_s32_to_s64_v
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
- ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s64) = G_ZEXT [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s64) = G_ZEXT %0
...
@@ -146,7 +164,8 @@ body: |
; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1
; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[C]], [[C1]]
- ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT]](s32)
+ ; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[SELECT]](s32)
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[SELECT]](s32), [[COPY2]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s1) = G_ICMP intpred(eq), %0, %1
@@ -258,11 +277,28 @@ body: |
; CHECK-LABEL: name: zext_s1_to_s64_vgpr
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
- ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s64) = G_ANYEXT [[TRUNC]](s1)
- ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 63
- ; CHECK: [[SHL:%[0-9]+]]:vgpr(s64) = G_SHL [[ANYEXT]], [[C]](s32)
- ; CHECK: [[LSHR:%[0-9]+]]:vgpr(s64) = G_LSHR [[SHL]], [[C]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s1)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s1) = G_TRUNC %0
%2:_(s64) = G_ZEXT %1
...
+
+---
+name: zext_s16_to_s64_vgpr
+legalized: true
+
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: zext_s16_to_s64_vgpr
+ ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[ZEXT:%[0-9]+]]:vgpr(s32) = G_ZEXT [[TRUNC]](s16)
+ ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0
+ ; CHECK: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[ZEXT]](s32), [[C]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s16) = G_TRUNC %0
+ %2:_(s64) = G_ZEXT %1
+...
OpenPOWER on IntegriCloud