summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-01 01:06:43 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2019-10-01 01:06:43 +0000
commit77ac40011761399924641d15d51460f64c2be46e (patch)
tree62082306aaef1449b4c748fa92e56ada128e5bf5 /llvm/lib
parent4d536bfbead1494c7977689e32837353531ffc4b (diff)
downloadbcm5719-llvm-77ac40011761399924641d15d51460f64c2be46e.tar.gz
bcm5719-llvm-77ac40011761399924641d15d51460f64c2be46e.zip
AMDGPU/GlobalISel: Legalize G_GLOBAL_VALUE
Handle other cases besides LDS. Mostly a straight port of the existing handling, without the intermediate custom nodes. llvm-svn: 373286
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp106
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h2
3 files changed, 105 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 02b3b421026..7f71a220f76 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -309,7 +309,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalIf(isPointer(0));
setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
- getActionDefinitionsBuilder(G_GLOBAL_VALUE).customFor({LocalPtr});
+ getActionDefinitionsBuilder(G_GLOBAL_VALUE)
+ .customFor({LocalPtr, GlobalPtr, ConstantPtr, Constant32Ptr});
auto &FPOpActions = getActionDefinitionsBuilder(
@@ -1509,6 +1510,62 @@ bool AMDGPULegalizerInfo::legalizeSinCos(
return true;
}
+bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(
+ Register DstReg, LLT PtrTy,
+ MachineIRBuilder &B, const GlobalValue *GV,
+ unsigned Offset, unsigned GAFlags) const {
+ // In order to support pc-relative addressing, SI_PC_ADD_REL_OFFSET is lowered
+ // to the following code sequence:
+ //
+ // For constant address space:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol
+ // s_addc_u32 s1, s1, 0
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // a fixup or relocation is emitted to replace $symbol with a literal
+ // constant, which is a pc-relative offset from the encoding of the $symbol
+ // operand to the global variable.
+ //
+ // For global address space:
+ // s_getpc_b64 s[0:1]
+ // s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo
+ // s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi
+ //
+ // s_getpc_b64 returns the address of the s_add_u32 instruction and then
+ // fixups or relocations are emitted to replace $symbol@*@lo and
+ // $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
+ // which is a 64-bit pc-relative offset from the encoding of the $symbol
+ // operand to the global variable.
+ //
+ // What we want here is an offset from the value returned by s_getpc
+ // (which is the address of the s_add_u32 instruction) to the global
+ // variable, but since the encoding of $symbol starts 4 bytes after the start
+ // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
+ // small. This requires us to add 4 to the global variable offset in order to
+ // compute the correct address.
+
+ LLT ConstPtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+
+ Register PCReg = PtrTy.getSizeInBits() != 32 ? DstReg :
+ B.getMRI()->createGenericVirtualRegister(ConstPtrTy);
+
+ MachineInstrBuilder MIB = B.buildInstr(AMDGPU::SI_PC_ADD_REL_OFFSET)
+ .addDef(PCReg);
+
+ MIB.addGlobalAddress(GV, Offset + 4, GAFlags);
+ if (GAFlags == SIInstrInfo::MO_NONE)
+ MIB.addImm(0);
+ else
+ MIB.addGlobalAddress(GV, Offset + 4, GAFlags + 1);
+
+ B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
+
+ if (PtrTy.getSizeInBits() == 32)
+ B.buildExtract(DstReg, PCReg, 0);
+ return true;
+ }
+
bool AMDGPULegalizerInfo::legalizeGlobalValue(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1519,10 +1576,9 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
const GlobalValue *GV = MI.getOperand(1).getGlobal();
MachineFunction &MF = B.getMF();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ B.setInstr(MI);
if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
- B.setInstr(MI);
-
if (!MFI->isEntryFunction()) {
const Function &Fn = MF.getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
@@ -1536,13 +1592,47 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
MI.eraseFromParent();
return true;
}
+
+ const Function &Fn = MF.getFunction();
+ DiagnosticInfoUnsupported BadInit(
+ Fn, "unsupported initializer for address space", MI.getDebugLoc());
+ Fn.getContext().diagnose(BadInit);
+ return true;
+ }
+
+ const SITargetLowering *TLI = ST.getTargetLowering();
+
+ if (TLI->shouldEmitFixup(GV)) {
+ buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (TLI->shouldEmitPCReloc(GV)) {
+ buildPCRelGlobalAddress(DstReg, Ty, B, GV, 0, SIInstrInfo::MO_REL32);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ LLT PtrTy = LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64);
+ Register GOTAddr = MRI.createGenericVirtualRegister(PtrTy);
+
+ MachineMemOperand *GOTMMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ 8 /*Size*/, 8 /*Align*/);
+
+ buildPCRelGlobalAddress(GOTAddr, PtrTy, B, GV, 0, SIInstrInfo::MO_GOTPCREL32);
+
+ if (Ty.getSizeInBits() == 32) {
+ // Truncate if this is a 32-bit constant adrdess.
+ auto Load = B.buildLoad(PtrTy, GOTAddr, *GOTMMO);
+ B.buildExtract(DstReg, Load, 0);
} else
- return false;
+ B.buildLoad(DstReg, GOTAddr, *GOTMMO);
- const Function &Fn = MF.getFunction();
- DiagnosticInfoUnsupported BadInit(
- Fn, "unsupported initializer for address space", MI.getDebugLoc());
- Fn.getContext().diagnose(BadInit);
+ MI.eraseFromParent();
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 855444fa276..e2edadc9697 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "AMDGPUArgumentUsageInfo.h"
+#include "SIInstrInfo.h"
namespace llvm {
@@ -58,6 +59,10 @@ public:
bool legalizeSinCos(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool buildPCRelGlobalAddress(
+ Register DstReg, LLT PtrTy, MachineIRBuilder &B, const GlobalValue *GV,
+ unsigned Offset, unsigned GAFlags = SIInstrInfo::MO_NONE) const;
+
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 11a9cffac61..f1ed53091a1 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -186,6 +186,7 @@ private:
unsigned isCFIntrinsic(const SDNode *Intr) const;
+public:
/// \returns True if fixup needs to be emitted for given global value \p GV,
/// false otherwise.
bool shouldEmitFixup(const GlobalValue *GV) const;
@@ -198,6 +199,7 @@ private:
/// global value \p GV, false otherwise.
bool shouldEmitPCReloc(const GlobalValue *GV) const;
+private:
// Analyze a combined offset from an amdgcn_buffer_ intrinsic and store the
// three offsets (voffset, soffset and instoffset) into the SDValue[3] array
// pointed to by Offsets.
OpenPOWER on IntegriCloud