diff options
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa-func-align.ll | 18 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa-func.ll | 13 |
3 files changed, 34 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index c35a0912adb..7ee4bcb86fe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -184,9 +184,11 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { + const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); // The starting address of all shader programs must be 256 bytes aligned. - MF.setAlignment(8); + // Regular functions just need the basic required instruction alignment. + MF.setAlignment(MFI->isEntryFunction() ? 8 : 2); SetupMachineFunction(MF); diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func-align.ll b/llvm/test/CodeGen/AMDGPU/hsa-func-align.ll new file mode 100644 index 00000000000..a00f5e2669d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/hsa-func-align.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri < %s | FileCheck -check-prefix=HSA %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -filetype=obj < %s | llvm-readobj -symbols -s -sd | FileCheck -check-prefix=ELF %s + +; ELF: Section { +; ELF: Name: .text +; ELF: SHF_ALLOC (0x2) +; ELF: SHF_EXECINSTR (0x4) +; ELF: AddressAlignment: 32 +; ELF: } + +; HSA: .globl simple_align16 +; HSA: .p2align 5 +define void @simple_align16(i32 addrspace(1)* addrspace(2)* %ptr.out) align 32 { +entry: + %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out + store i32 0, i32 addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll index b4cdd4030d8..147cf9bbe6c 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll @@ -14,6 +14,7 @@ ; ELF: Flags [ (0x6) ; ELF: SHF_ALLOC (0x2) ; ELF: SHF_EXECINSTR (0x4) +; ELF: AddressAlignment: 4 ; ELF: } ; ELF: SHT_NOTE @@ -36,6 +37,8 @@ ; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU" ; HSA-NOT: .amdgpu_hsa_kernel simple +; HSA: .globl simple +; HSA: .p2align 2 ; HSA: {{^}}simple: ; HSA: .amd_kernel_code_t ; HSA: enable_sgpr_private_segment_buffer = 1 @@ -58,3 +61,13 @@ entry: store i32 0, i32 addrspace(1)* %out ret void } + +; Ignore explicit alignment that is too low. +; HSA: .globl simple_align2 +; HSA: .p2align 2 +define void @simple_align2(i32 addrspace(1)* addrspace(2)* %ptr.out) align 2 { +entry: + %out = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(2)* %ptr.out + store i32 0, i32 addrspace(1)* %out + ret void +} |