summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp14
-rw-r--r--llvm/test/CodeGen/AMDGPU/hsa-func.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir25
3 files changed, 40 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index b14ef3ad361..47257ce16ce 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -690,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
for (MachineInstr *I : RemoveMI)
I->eraseFromParent();
+ if (!MFI->isEntryFunction()) {
+ // Wait for any outstanding memory operations that the input registers may
+ // depend on. We can't track them and it's better to to the wait after the
+ // costly call sequence.
+
+ // TODO: Could insert earlier and schedule more liberally with operations
+ // that only use caller preserved registers.
+ MachineBasicBlock &EntryBB = MF.front();
+ BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ Changes = true;
+ }
+
return Changes;
}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll
index d9662b69b12..b4cdd4030d8 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll
@@ -26,7 +26,7 @@
; ELF: Symbol {
; ELF: Name: simple
-; ELF: Size: 288
+; ELF: Size: 292
; ELF: Type: Function (0x2)
; ELF: }
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir b/llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir
new file mode 100644
index 00000000000..ad7cd0cc8ab
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir
@@ -0,0 +1,25 @@
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
+--- |
+ define float @entry_callee_wait(float %arg) #0 {
+ ret float %arg
+ }
+
+ attributes #0 = { nounwind }
+...
+---
+# CHECK-LABEL: name: entry_callee_wait{{$}}
+# CHECK: bb.0:
+# CHECK-NEXT: S_WAITCNT 0{{$}}
+# CHECK-NEXT: V_ADD_F32
+# CHECK-NEXT: S_SETPC_B64
+liveins:
+ - { reg: '%sgpr0_sgpr1' }
+ - { reg: '%vgpr0' }
+
+name: entry_callee_wait
+body: |
+ bb.0:
+ %vgpr0 = V_ADD_F32_e32 %vgpr0, %vgpr0, implicit %exec
+ S_SETPC_B64 killed %sgpr0_sgpr1
+
+...
OpenPOWER on IntegriCloud