summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp8
-rw-r--r--llvm/test/CodeGen/AMDGPU/s_code_end.ll94
2 files changed, 55 insertions, 47 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 03c6a671338..5578251bcfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -309,7 +309,13 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
CallGraphResourceInfo.clear();
- if (AMDGPU::isGFX10(*getGlobalSTI())) {
+ // Pad with s_code_end to help tools and guard against instruction prefetch
+ // causing stale data in caches. Arguably this should be done by the linker,
+ // which is why this isn't done for Mesa.
+ const MCSubtargetInfo &STI = *getGlobalSTI();
+ if (AMDGPU::isGFX10(STI) &&
+ (STI.getTargetTriple().getOS() == Triple::AMDHSA ||
+ STI.getTargetTriple().getOS() == Triple::AMDPAL)) {
OutStreamer->SwitchSection(getObjFileLowering().getTextSection());
getTargetStreamer()->EmitCodeEnd();
}
diff --git a/llvm/test/CodeGen/AMDGPU/s_code_end.ll b/llvm/test/CodeGen/AMDGPU/s_code_end.ll
index 80f566ba926..2e87098a42f 100644
--- a/llvm/test/CodeGen/AMDGPU/s_code_end.ll
+++ b/llvm/test/CodeGen/AMDGPU/s_code_end.ll
@@ -1,11 +1,13 @@
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10,GFX10-ASM %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10,GFX10-OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10END,GFX10END-OBJ %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10END,GFX10END-ASM %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 < %s | FileCheck -check-prefixes=GCN,GCN-ASM,GFX10NOEND,GFX10NOEND-ASM %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 -filetype=obj < %s | llvm-objdump -arch=amdgcn -mcpu=gfx1010 -disassemble - | FileCheck -check-prefixes=GCN,GCN-OBJ,GFX10NOEND,GFX10NOEND-OBJ %s
; GCN: a_kernel1:
-; GCN-NEXT: s_endpgm
-; GCN-ASM-NEXT: [[END_LABEL1:\.Lfunc_end.*]]:
+; GCN: s_endpgm
+; GCN-ASM: [[END_LABEL1:\.Lfunc_end.*]]:
; GCN-ASM-NEXT: .size a_kernel1, [[END_LABEL1]]-a_kernel1
-; GCN-ASM: .section .AMDGPU.config
; GCN-OBJ-NEXT: s_nop 0
@@ -14,19 +16,17 @@ define amdgpu_kernel void @a_kernel1() {
}
; GCN: a_kernel2:
-; GCN-NEXT: s_endpgm
-; GCN-ASM-NEXT: [[END_LABEL2:\.Lfunc_end.*]]:
+; GCN: s_endpgm
+; GCN-ASM: [[END_LABEL2:\.Lfunc_end.*]]:
; GCN-ASM-NEXT: .size a_kernel2, [[END_LABEL2]]-a_kernel2
-; GCN-ASM: .section .AMDGPU.config
-; GCN-OBJ-NEXT: {{^$}}
+; GCN-OBJ: {{^$}}
define amdgpu_kernel void @a_kernel2() {
ret void
}
-; GCN-ASM: .text
-; GCN-ASM-NEXT: .globl a_function
+; GCN-ASM: .globl a_function
; GCN-ASM-NEXT: .p2align 2
; GCN-ASM-NEXT: .type a_function,@function
@@ -34,46 +34,48 @@ define amdgpu_kernel void @a_kernel2() {
; GCN: s_setpc_b64
; GCN-ASM-NEXT: [[END_LABEL3:\.Lfunc_end.*]]:
; GCN-ASM-NEXT: .size a_function, [[END_LABEL3]]-a_function
-; GFX10-ASM: .p2alignl 6, 3214868480
-; GFX10-ASM-NEXT: .fill 32, 4, 3214868480
+; GFX10END-ASM: .p2alignl 6, 3214868480
+; GFX10END-ASM-NEXT: .fill 32, 4, 3214868480
+; GFX10NOEND-NOT: .fill
-; GFX10-OBJ-NEXT: s_code_end
+; GFX10NOEND-OBJ-NOT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
-; GFX10-OBJ: s_code_end // 000000000140:
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
+; GFX10END-OBJ: s_code_end // 000000000140:
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
-; GFX10-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
+; GFX10END-OBJ-NEXT: s_code_end
define void @a_function() {
ret void
OpenPOWER on IntegriCloud