summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-07 19:07:19 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-07 19:07:19 +0000
commit0b2ea91d6d162c3d5af824729ff3f925d163f8ac (patch)
tree077f02c42224099220bf3ac0fdd5bc2a1a76523f
parent578fa2819fa64b60dfec66a3b970634c71a39841 (diff)
downloadbcm5719-llvm-0b2ea91d6d162c3d5af824729ff3f925d163f8ac.tar.gz
bcm5719-llvm-0b2ea91d6d162c3d5af824729ff3f925d163f8ac.zip
AMDGPU/GlobalISel: Use S_MOV_B64 for inline constants
This hides some defects in SIFoldOperands when the immediates are split. llvm-svn: 373943
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp47
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir19
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir4
3 files changed, 39 insertions, 31 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 28ebbd9101c..aa165d4ce21 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1472,31 +1472,38 @@ bool AMDGPUInstructionSelector::selectG_CONSTANT(MachineInstr &I) const {
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- DebugLoc DL = I.getDebugLoc();
- const TargetRegisterClass *RC = IsSgpr ? &AMDGPU::SReg_32_XM0RegClass :
- &AMDGPU::VGPR_32RegClass;
- Register LoReg = MRI->createVirtualRegister(RC);
- Register HiReg = MRI->createVirtualRegister(RC);
- const APInt &Imm = APInt(Size, I.getOperand(1).getImm());
-
- BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
- .addImm(Imm.trunc(32).getZExtValue());
-
- BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
- .addImm(Imm.ashr(32).getZExtValue());
-
- const MachineInstr *RS =
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
- .addReg(LoReg)
- .addImm(AMDGPU::sub0)
- .addReg(HiReg)
- .addImm(AMDGPU::sub1);
+ const DebugLoc &DL = I.getDebugLoc();
+
+ APInt Imm(Size, I.getOperand(1).getImm());
+
+ MachineInstr *ResInst;
+ if (IsSgpr && TII.isInlineConstant(Imm)) {
+ ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B64), DstReg)
+ .addImm(I.getOperand(1).getImm());
+ } else {
+ const TargetRegisterClass *RC = IsSgpr ?
+ &AMDGPU::SReg_32_XM0RegClass : &AMDGPU::VGPR_32RegClass;
+ Register LoReg = MRI->createVirtualRegister(RC);
+ Register HiReg = MRI->createVirtualRegister(RC);
+
+ BuildMI(*BB, &I, DL, TII.get(Opcode), LoReg)
+ .addImm(Imm.trunc(32).getZExtValue());
+
+ BuildMI(*BB, &I, DL, TII.get(Opcode), HiReg)
+ .addImm(Imm.ashr(32).getZExtValue());
+
+ ResInst = BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(LoReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiReg)
+ .addImm(AMDGPU::sub1);
+ }
// We can't call constrainSelectedInstRegOperands here, because it doesn't
// work for target independent opcodes
I.eraseFromParent();
const TargetRegisterClass *DstRC =
- TRI.getConstrainedRegClassForOperand(RS->getOperand(0), *MRI);
+ TRI.getConstrainedRegClassForOperand(ResInst->getOperand(0), *MRI);
if (!DstRC)
return true;
return RBI.constrainGenericRegister(DstReg, *DstRC, *MRI);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
index b97f9d384aa..2acf1aeb5a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-constant.mir
@@ -5,6 +5,7 @@
name: constant
legalized: true
regBankSelected: true
+tracksRegLiveness: true
body: |
@@ -25,28 +26,30 @@ body: |
; GCN: %{{[0-9]+}}:sreg_32 = S_MOV_B32 1065353216
%4:sgpr(s32) = G_FCONSTANT float 1.0
+ ; GCN: %5:sreg_64_xexec = S_MOV_B64 4607182418800017408
+ %5:sgpr(s64) = G_FCONSTANT double 1.0
+
; GCN: [[LO1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 0
- ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1072693248
+ ; GCN: [[HI1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1076101120
; GCN: %{{[0-9]+}}:sreg_64_xexec = REG_SEQUENCE [[LO1]], %subreg.sub0, [[HI1]], %subreg.sub1
- %5:sgpr(s64) = G_FCONSTANT double 1.0
+ %6:sgpr(s64) = G_FCONSTANT double 10.0
; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1
- %6:vgpr(s32) = G_CONSTANT i32 1
+ %7:vgpr(s32) = G_CONSTANT i32 1
; GCN: [[LO2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0
; GCN: [[HI2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1
; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO2]], %subreg.sub0, [[HI2]], %subreg.sub1
- %7:vgpr(s64) = G_CONSTANT i64 4294967296
+ %8:vgpr(s64) = G_CONSTANT i64 4294967296
; GCN: %{{[0-9]+}}:vgpr_32 = V_MOV_B32_e32 1065353216
- %8:vgpr(s32) = G_FCONSTANT float 1.0
+ %9:vgpr(s32) = G_FCONSTANT float 1.0
; GCN: [[LO3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0
; GCN: [[HI3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1072693248
; GCN: %{{[0-9]+}}:vreg_64 = REG_SEQUENCE [[LO3]], %subreg.sub0, [[HI3]], %subreg.sub1
- %9:vgpr(s64) = G_FCONSTANT double 1.0
+ %10:vgpr(s64) = G_FCONSTANT double 1.0
- S_ENDPGM 0, implicit %2, implicit %4, implicit %6, implicit %8, implicit %3, implicit %5, implicit %7, implicit %9
+ S_ENDPGM 0, implicit %2, implicit %4, implicit %5, implicit %6, implicit %8, implicit %3, implicit %5, implicit %7, implicit %9, implicit %10
...
-
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
index 367c92b5243..30cb3f032d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-smrd.mir
@@ -190,9 +190,7 @@ body: |
# Test a load of an offset from a constant base address
# GCN-LABEL: name: constant_address_positive{{$}}
-# GCN: %4:sreg_32_xm0 = S_MOV_B32 44
-# GCN: %5:sreg_32_xm0 = S_MOV_B32 0
-# GCN: %0:sreg_64 = REG_SEQUENCE %4, %subreg.sub0, %5, %subreg.sub1
+# GCN: %0:sreg_64 = S_MOV_B64 44
# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
OpenPOWER on IntegriCloud