diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-11 22:29:31 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-11 22:29:31 +0000 |
commit | 9ac40026ddc7e759287f0a5cffa88ea8a1fb9d3e (patch) | |
tree | 4f4ccd780aa94ccb0d3ca872535891d8aa8d56b2 /llvm | |
parent | efa9f4b2107f5f89dbc78b355fe2614cdc541f85 (diff) | |
download | bcm5719-llvm-9ac40026ddc7e759287f0a5cffa88ea8a1fb9d3e.tar.gz bcm5719-llvm-9ac40026ddc7e759287f0a5cffa88ea8a1fb9d3e.zip |
AMDGPU: Insert wait at start of callee functions
llvm-svn: 300000
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa-func.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir | 25 |
3 files changed, 40 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index b14ef3ad361..47257ce16ce 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -690,5 +690,19 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { for (MachineInstr *I : RemoveMI) I->eraseFromParent(); + if (!MFI->isEntryFunction()) { + // Wait for any outstanding memory operations that the input registers may + // depend on. We can't track them and it's better to to the wait after the + // costly call sequence. + + // TODO: Could insert earlier and schedule more liberally with operations + // that only use caller preserved registers. + MachineBasicBlock &EntryBB = MF.front(); + BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT)) + .addImm(0); + + Changes = true; + } + return Changes; } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll index d9662b69b12..b4cdd4030d8 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll @@ -26,7 +26,7 @@ ; ELF: Symbol { ; ELF: Name: simple -; ELF: Size: 288 +; ELF: Size: 292 ; ELF: Type: Function (0x2) ; ELF: } diff --git a/llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir b/llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir new file mode 100644 index 00000000000..ad7cd0cc8ab --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/insert-waits-callee.mir @@ -0,0 +1,25 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s +--- | + define float @entry_callee_wait(float %arg) #0 { + ret float %arg + } + + attributes #0 = { nounwind } +... +--- +# CHECK-LABEL: name: entry_callee_wait{{$}} +# CHECK: bb.0: +# CHECK-NEXT: S_WAITCNT 0{{$}} +# CHECK-NEXT: V_ADD_F32 +# CHECK-NEXT: S_SETPC_B64 +liveins: + - { reg: '%sgpr0_sgpr1' } + - { reg: '%vgpr0' } + +name: entry_callee_wait +body: | + bb.0: + %vgpr0 = V_ADD_F32_e32 %vgpr0, %vgpr0, implicit %exec + S_SETPC_B64 killed %sgpr0_sgpr1 + +... |