summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTom Stellard <thomas.stellard@amd.com>2016-07-13 14:23:33 +0000
committerTom Stellard <thomas.stellard@amd.com>2016-07-13 14:23:33 +0000
commit418beb7671d6cbf5b91241a6653e077815cca9f3 (patch)
tree671385700aec05feb79645c5e8fe85fb7abb372d
parent533a893fa126d8a9662d836ad28f51afa1d2e74a (diff)
downloadbcm5719-llvm-418beb7671d6cbf5b91241a6653e077815cca9f3.tar.gz
bcm5719-llvm-418beb7671d6cbf5b91241a6653e077815cca9f3.zip
AMDGPU/SI: Add support for R_AMDGPU_GOTPCREL
Reviewers: rafael, ruiu, tony-tye, arsenm, kzhuravl Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: http://reviews.llvm.org/D21484 llvm-svn: 275268
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp6
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp73
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h6
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-variable-relocs.ll203
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-zero-initializer.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-func.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa.ll4
10 files changed, 277 insertions, 51 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index c7784d7a76c..64d494b70d8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -106,7 +106,7 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
AMDGPUTargetStreamer *TS =
static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
- TS->EmitDirectiveHSACodeObjectVersion(2, 0);
+ TS->EmitDirectiveHSACodeObjectVersion(2, 1);
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index e1e6f2eb3a3..ad8d3e4d354 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -39,6 +39,13 @@ using namespace llvm;
AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
Ctx(ctx), ST(st) { }
+static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
+ switch (MOFlags) {
+ default: return MCSymbolRefExpr::VK_None;
+ case SIInstrInfo::MO_GOTPCREL: return MCSymbolRefExpr::VK_GOTPCREL;
+ }
+}
+
void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
int MCOpcode = ST.getInstrInfo()->pseudoToMCOpcode(MI->getOpcode());
@@ -69,7 +76,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_GlobalAddress: {
const GlobalValue *GV = MO.getGlobal();
MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
- const MCExpr *SymExpr = MCSymbolRefExpr::create(Sym, Ctx);
+ const MCExpr *SymExpr =
+ MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
MCOp = MCOperand::createExpr(Expr);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 925f879403f..3ba1e6e1a4c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -134,9 +134,9 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
}
static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
- if (!RM.hasValue())
- return Reloc::PIC_;
- return *RM;
+ // The AMDGPU toolchain only supports generating shared objects, so we
+ // must always use PIC.
+ return Reloc::PIC_;
}
AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 7254dac02a7..1899ecb07b8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -510,12 +510,14 @@ bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
const Value *Ptr = MemNode->getMemOperand()->getValue();
// UndefValue means this is a load of a kernel input. These are uniform.
- // Sometimes LDS instructions have constant pointers
- if (isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || isa<Constant>(Ptr) ||
- isa<GlobalValue>(Ptr))
+ // Sometimes LDS instructions have constant pointers.
+ // If Ptr is null, then that means this mem operand contains a
+ // PseudoSourceValue like GOT.
+ if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) ||
+ isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
return true;
- const Instruction *I = dyn_cast_or_null<Instruction>(Ptr);
+ const Instruction *I = dyn_cast<Instruction>(Ptr);
return I && I->getMetadata("amdgpu.uniform");
}
@@ -1517,27 +1519,22 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
return DAG.getUNDEF(ASC->getValueType(0));
}
+static bool shouldEmitGOTReloc(const GlobalValue *GV,
+ const TargetMachine &TM) {
+ return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
+}
+
bool
SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
- if (GA->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
- return false;
-
- return TargetLowering::isOffsetFoldingLegal(GA);
+ // We can fold offsets for anything that doesn't require a GOT relocation.
+ return GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+ !shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine());
}
-SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
- SDValue Op,
- SelectionDAG &DAG) const {
- GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
-
- if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
- GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
- return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
-
- SDLoc DL(GSD);
- const GlobalValue *GV = GSD->getGlobal();
- EVT PtrVT = Op.getValueType();
-
+static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
+ SDLoc DL, unsigned Offset, EVT PtrVT,
+ unsigned GAFlags = SIInstrInfo::MO_NONE) {
// In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
// lowered to the following code sequence:
// s_getpc_b64 s[0:1]
@@ -1555,11 +1552,41 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
// of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
// small. This requires us to add 4 to the global variable offset in order to
// compute the correct address.
- SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32,
- GSD->getOffset() + 4);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
+ GAFlags);
return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA);
}
+SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
+ SDValue Op,
+ SelectionDAG &DAG) const {
+ GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
+
+ if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS &&
+ GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS)
+ return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
+
+ SDLoc DL(GSD);
+ const GlobalValue *GV = GSD->getGlobal();
+ EVT PtrVT = Op.getValueType();
+
+ if (!shouldEmitGOTReloc(GV, getTargetMachine()))
+ return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
+
+ SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
+ SIInstrInfo::MO_GOTPCREL);
+
+ Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
+ PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
+ const DataLayout &DataLayout = DAG.getDataLayout();
+ unsigned Align = DataLayout.getABITypeAlignment(PtrTy);
+ // FIXME: Use a PseudoSourceValue once those can be assigned an address space.
+ MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
+
+ return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr,
+ PtrInfo, false, false, true, Align);
+}
+
SDValue SITargetLowering::lowerTRAP(SDValue Op,
SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 59e9325fd7c..8b4d6131acb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -91,6 +91,12 @@ protected:
unsigned OpIdx1) const override;
public:
+
+ enum TargetOperandFlags {
+ MO_NONE = 0,
+ MO_GOTPCREL = 1
+ };
+
explicit SIInstrInfo(const SISubtarget &);
const SIRegisterInfo &getRegisterInfo() const {
diff --git a/llvm/test/CodeGen/AMDGPU/global-variable-relocs.ll b/llvm/test/CodeGen/AMDGPU/global-variable-relocs.ll
new file mode 100644
index 00000000000..c39394a3527
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/global-variable-relocs.ll
@@ -0,0 +1,203 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji < %s | FileCheck %s
+
+@private = private addrspace(1) global [256 x i32] zeroinitializer
+@internal = internal addrspace(1) global [256 x i32] zeroinitializer
+@available_externally = available_externally addrspace(1) global [256 x i32] zeroinitializer
+@linkonce = linkonce addrspace(1) global [256 x i32] zeroinitializer
+@weak= weak addrspace(1) global [256 x i32] zeroinitializer
+@common = common addrspace(1) global [256 x i32] zeroinitializer
+@extern_weak = extern_weak addrspace(1) global [256 x i32]
+@linkonce_odr = linkonce_odr addrspace(1) global [256 x i32] zeroinitializer
+@weak_odr = weak_odr addrspace(1) global [256 x i32] zeroinitializer
+@external = external addrspace(1) global [256 x i32]
+@external_w_init = addrspace(1) global [256 x i32] zeroinitializer
+
+; CHECK-LABEL: private_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], private+8
+; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @private_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @private, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: internal_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], internal+8
+; CHECK: s_addc_u32 s[[ADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[ADDR_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[ADDR_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @internal_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @internal, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: available_externally_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], available_externally@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @available_externally_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @available_externally, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: linkonce_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @linkonce_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: weak_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @weak_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: common_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], common@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @common_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @common, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: extern_weak_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], extern_weak@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @extern_weak_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @extern_weak, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: linkonce_odr_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], linkonce_odr@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @linkonce_odr_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @linkonce_odr, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: weak_odr_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], weak_odr@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @weak_odr_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @weak_odr, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: external_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @external_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: external_w_init_test:
+; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
+; CHECK: s_add_u32 s[[GOTADDR_LO:[0-9]+]], s[[PC_LO]], external_w_init@GOTPCREL+4
+; CHECK: s_addc_u32 s[[GOTADDR_HI:[0-9]+]], s[[PC_HI]], 0
+; CHECK: s_load_dwordx2 s{{\[}}[[ADDR_LO:[0-9]+]]:[[ADDR_HI:[0-9]+]]{{\]}}, s{{\[}}[[GOTADDR_LO]]:[[GOTADDR_HI]]{{\]}}, 0x0
+; CHECK: s_add_u32 s[[GEP_LO:[0-9]+]], s[[ADDR_LO]], 4
+; CHECK: s_addc_u32 s[[GEP_HI:[0-9]+]], s[[ADDR_HI]], 0
+; CHECK-DAG: v_mov_b32_e32 v[[V_LO:[0-9]+]], s[[GEP_LO]]
+; CHECK-DAG: v_mov_b32_e32 v[[V_HI:[0-9]+]], s[[GEP_HI]]
+; CHECK: flat_load_dword v{{[0-9]+}}, v{{\[}}[[V_LO]]:[[V_HI]]{{\]}}
+define void @external_w_init_test(i32 addrspace(1)* %out) {
+ %ptr = getelementptr [256 x i32], [256 x i32] addrspace(1)* @external_w_init, i32 0, i32 1
+ %val = load i32, i32 addrspace(1)* %ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK: .local private
+; CHECK: .local internal
+; CHECK: .weak linkonce
+; CHECK: .weak weak
+; CHECK: .weak linkonce_odr
+; CHECK: .weak weak_odr
+; CHECK-NOT: external{{$}}
+; CHECK: .globl external_w_init
diff --git a/llvm/test/CodeGen/AMDGPU/global-zero-initializer.ll b/llvm/test/CodeGen/AMDGPU/global-zero-initializer.ll
deleted file mode 100644
index 59c3960c46e..00000000000
--- a/llvm/test/CodeGen/AMDGPU/global-zero-initializer.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=SI < %s 2>&1 | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga < %s 2>&1 | FileCheck %s
-
-; CHECK: {{^}}load_init_global_global:
-; CHECK: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
-; CHECK: s_add_u32 s[[ADDR_LO:[0-9]+]], s[[PC_LO]], global+4
-; CHECK: s_addc_u32 s5, s[[PC_HI]], 0
-; CHECK: buffer_load_dword v{{[0-9]+}}, off, s{{\[}}[[ADDR_LO]]:7], 0 offset:40
-; CHECK: global:
-; CHECK: .zero 1024
-@global = addrspace(1) global [256 x i32] zeroinitializer
-
-define void @load_init_global_global(i32 addrspace(1)* %out, i1 %p) {
- %gep = getelementptr [256 x i32], [256 x i32] addrspace(1)* @global, i32 0, i32 10
- %ld = load i32, i32 addrspace(1)* %gep
- store i32 %ld, i32 addrspace(1)* %out
- ret void
-}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll
index 4aa003265ba..28c8b5d73b0 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll
@@ -18,7 +18,7 @@
; ELF: SHT_NOTE
; ELF: 0000: 04000000 08000000 01000000 414D4400
-; ELF: 0010: 02000000 00000000 04000000 1B000000
+; ELF: 0010: 02000000 01000000 04000000 1B000000
; ELF: 0020: 03000000 414D4400 04000700 07000000
; ELF: 0030: 00000000 00000000 414D4400 414D4447
@@ -30,7 +30,7 @@
; ELF: Type: Function (0x2)
; ELF: }
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll
index 67d82e2731e..1b4a0f3090b 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA --check-prefix=HSA-VI %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji | FileCheck --check-prefix=HSA --check-prefix=HSA-FIJI %s
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
; HSA-FIJI: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll
index 1f07242d29a..82d7da188ca 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa.ll
@@ -18,7 +18,7 @@
; ELF: SHT_NOTE
; ELF: 0000: 04000000 08000000 01000000 414D4400
-; ELF: 0010: 02000000 00000000 04000000 1B000000
+; ELF: 0010: 02000000 01000000 04000000 1B000000
; ELF: 0020: 03000000 414D4400 04000700 07000000
; ELF: 0030: 00000000 00000000 414D4400 414D4447
; ELF: 0040: 50550000
@@ -29,7 +29,7 @@
; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
; ELF: }
-; HSA: .hsa_code_object_version 2,0
+; HSA: .hsa_code_object_version 2,1
; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
OpenPOWER on IntegriCloud