diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.h | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global-constant.ll | 55 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa-globals.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll | 12 |
8 files changed, 94 insertions, 45 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index 739c609eeb1..1dbb8a3a77f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -9,10 +9,10 @@ #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" -#include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/Support/ELF.h" +#include "Utils/AMDGPUBaseInfo.h" using namespace llvm; @@ -22,7 +22,8 @@ using namespace llvm; MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal( const GlobalValue *GV, SectionKind Kind, const TargetMachine &TM) const { - if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV)) + if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV) && + AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple())) return TextSection; return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, TM); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ff6096ca4f0..2e9d90e30b7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1813,6 +1813,23 @@ void SITargetLowering::createDebuggerPrologueStackObjects( } } +bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { + const Triple &TT = getTargetMachine().getTargetTriple(); + return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + AMDGPU::shouldEmitConstantsToTextSection(TT); +} + +bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { + return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + !shouldEmitFixup(GV) && + !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); +} + +bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const { + return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV); +} + /// This transforms the control flow intrinsics to get the branch destination as /// last parameter, also switches branch target with BR if the need arise SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, @@ -1997,29 +2014,12 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, return DAG.getUNDEF(ASC->getValueType(0)); } -static bool shouldEmitFixup(const GlobalValue *GV, - const TargetMachine &TM) { - // FIXME: We need to emit global variables in constant address space in a - // separate section, and use relocations. - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; -} - -static bool shouldEmitGOTReloc(const GlobalValue *GV, - const TargetMachine &TM) { - return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && - !TM.shouldAssumeDSOLocal(*GV->getParent(), GV); -} - -static bool shouldEmitPCReloc(const GlobalValue *GV, - const TargetMachine &TM) { - return !shouldEmitFixup(GV, TM) && !shouldEmitGOTReloc(GV, TM); -} - bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // We can fold offsets for anything that doesn't require a GOT relocation. - return GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && - !shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine()); + return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || + GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + !shouldEmitGOTReloc(GA->getGlobal()); } static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV, @@ -2076,9 +2076,9 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, const GlobalValue *GV = GSD->getGlobal(); EVT PtrVT = Op.getValueType(); - if (shouldEmitFixup(GV, getTargetMachine())) + if (shouldEmitFixup(GV)) return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT); - else if (shouldEmitPCReloc(GV, getTargetMachine())) + else if (shouldEmitPCReloc(GV)) return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT, SIInstrInfo::MO_REL32); diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index b65f95f7854..05b98c9f903 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -78,6 +78,19 @@ class SITargetLowering final : public AMDGPUTargetLowering { bool isCFIntrinsic(const SDNode *Intr) const; void createDebuggerPrologueStackObjects(MachineFunction &MF) const; + + /// \returns True if fixup needs to be emitted for given global value \p GV, + /// false otherwise. + bool shouldEmitFixup(const GlobalValue *GV) const; + + /// \returns True if GOT relocation needs to be emitted for given global value + /// \p GV, false otherwise. + bool shouldEmitGOTReloc(const GlobalValue *GV) const; + + /// \returns True if PC-relative relocation needs to be emitted for given + /// global value \p GV, false otherwise. + bool shouldEmitPCReloc(const GlobalValue *GV) const; + public: SITargetLowering(const TargetMachine &tm, const SISubtarget &STI); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 64721f686a2..fdb3a5edf01 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -164,6 +164,10 @@ bool isReadOnlySegment(const GlobalValue *GV) { return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; } +bool shouldEmitConstantsToTextSection(const Triple &TT) { + return TT.getOS() != Triple::AMDHSA; +} + int getIntegerAttribute(const Function &F, StringRef Name, int Default) { Attribute A = F.getFnAttribute(Name); int Result = Default; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 97c0738a99b..28e480b6f2a 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -55,6 +55,10 @@ bool isGroupSegment(const GlobalValue *GV); bool isGlobalSegment(const GlobalValue *GV); bool isReadOnlySegment(const GlobalValue *GV); +/// \returns True if constants should be emitted to .text section for given +/// target triple \p TT, false otherwise. +bool shouldEmitConstantsToTextSection(const Triple &TT); + /// \returns Integer value requested using \p F's \p Name attribute. /// /// \returns \p Default if attribute is not present. diff --git a/llvm/test/CodeGen/AMDGPU/global-constant.ll b/llvm/test/CodeGen/AMDGPU/global-constant.ll index 0f2fc836a24..5a18d425d50 100644 --- a/llvm/test/CodeGen/AMDGPU/global-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/global-constant.ll @@ -1,27 +1,54 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOHSA %s ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=HSA %s -@readonly = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] -@readonly2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0] +@private1 = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] +@private2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0] +@available_externally = available_externally addrspace(2) global [256 x i32] zeroinitializer -; GCN-LABEL: {{^}}main: +; GCN-LABEL: {{^}}private_test: ; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], readonly -; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0 + +; Non-HSA OSes use fixup into .text section. +; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1 +; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0 + +; HSA OSes use relocations. +; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], private1@rel32@lo+4 +; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], private1@rel32@hi+4 + ; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} -; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], readonly -; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 -; NOHSA: .text -; HSA: .text -; GCN: readonly: -; GCN: readonly2: -define void @main(i32 %index, float addrspace(1)* %out) { - %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly, i32 0, i32 %index + +; Non-HSA OSes use fixup into .text section. +; NOHSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2 +; NOHSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 + +; HSA OSes use relocations. +; HSA: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], private2@rel32@lo+4 +; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], private2@rel32@hi+4 + +define void @private_test(i32 %index, float addrspace(1)* %out) { + %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @private1, i32 0, i32 %index %val = load float, float addrspace(2)* %ptr store float %val, float addrspace(1)* %out - %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly2, i32 0, i32 %index + %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @private2, i32 0, i32 %index %val2 = load float, float addrspace(2)* %ptr2 store float %val2, float addrspace(1)* %out ret void } +; HSA-LABEL: {{^}}available_externally_test: +; HSA: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} +; HSA: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], available_externally@gotpcrel32@lo+4 +; HSA: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], available_externally@gotpcrel32@hi+4 +define void @available_externally_test(i32 addrspace(1)* %out) { + %ptr = getelementptr [256 x i32], [256 x i32] addrspace(2)* @available_externally, i32 0, i32 1 + %val = load i32, i32 addrspace(2)* %ptr + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; NOHSA: .text +; HSA: .section .rodata + +; GCN: private1: +; GCN: private2: diff --git a/llvm/test/CodeGen/AMDGPU/hsa-globals.ll b/llvm/test/CodeGen/AMDGPU/hsa-globals.ll index df478fbcf3b..2820b308edb 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-globals.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-globals.ll @@ -38,7 +38,7 @@ define void @test() { ; ASM: .size external_global_program, 4 ; ASM: .type internal_readonly,@object -; ASM: .text +; ASM: .section .rodata.cst4,"aM",@progbits,4 ; ASM: internal_readonly: ; ASM: .long 0 ; ASM: .size internal_readonly, 4 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll index fdaca922c4f..ccb383e0d02 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.memcpy.ll @@ -333,13 +333,13 @@ define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* no ; FUNC-LABEL: {{^}}test_memcpy_const_string_align4: ; SI: s_getpc_b64 -; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4+4 +; SI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, hello.align4+20 ; SI: s_addc_u32 -; SI: s_load_dwordx4 -; SI: s_load_dwordx4 -; SI: s_load_dwordx2 -; SI: buffer_store_dwordx4 -; SI: buffer_store_dwordx4 +; SI-DAG: s_load_dwordx4 +; SI-DAG: s_load_dwordx4 +; SI-DAG: s_load_dwordx2 +; SI-DAG: buffer_store_dwordx4 +; SI-DAG: buffer_store_dwordx4 define void @test_memcpy_const_string_align4(i8 addrspace(1)* noalias %out) nounwind { %str = bitcast [16 x i8] addrspace(2)* @hello.align4 to i8 addrspace(2)* call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* %out, i8 addrspace(2)* %str, i64 32, i32 4, i1 false) |