summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp2
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-skip.ll67
2 files changed, 68 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0e87b4354f2..dd7f173e44d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2486,7 +2486,7 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
Opcode == AMDGPU::DS_ORDERED_COUNT)
return true;
- if (MI.isInlineAsm())
+ if (MI.isCall() || MI.isInlineAsm())
return true; // conservative assumption
// These are like SALU instructions in terms of effects, so it's questionable
diff --git a/llvm/test/CodeGen/AMDGPU/call-skip.ll b/llvm/test/CodeGen/AMDGPU/call-skip.ll
new file mode 100644
index 00000000000..cd963df6c49
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/call-skip.ll
@@ -0,0 +1,67 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+
+; A call should be skipped if all lanes are zero, since we don't know
+; what side effects should be avoided inside the call.
+define hidden void @func() #1 {
+ ret void
+}
+
+; GCN-LABEL: {{^}}if_call:
+; GCN: s_and_saveexec_b64
+; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[END]]
+; GCN: s_swappc_b64
+; GCN: [[END]]:
+define void @if_call(i32 %flag) #0 {
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %call, label %end
+
+call:
+ call void @func()
+ br label %end
+
+end:
+ ret void
+}
+
+; GCN-LABEL: {{^}}if_asm:
+; GCN: s_and_saveexec_b64
+; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[END]]
+; GCN: ; sample asm
+; GCN: [[END]]:
+define void @if_asm(i32 %flag) #0 {
+ %cc = icmp eq i32 %flag, 0
+ br i1 %cc, label %call, label %end
+
+call:
+ call void asm sideeffect "; sample asm", ""()
+ br label %end
+
+end:
+ ret void
+}
+
+; GCN-LABEL: {{^}}if_call_kernel:
+; GCN: s_and_saveexec_b64
+; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_execz [[END]]
+; GCN: s_swappc_b64
+define amdgpu_kernel void @if_call_kernel() #0 {
+ %id = call i32 @llvm.amdgcn.workitem.id.x()
+ %cc = icmp eq i32 %id, 0
+ br i1 %cc, label %call, label %end
+
+call:
+ call void @func()
+ br label %end
+
+end:
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #2
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind noinline }
+attributes #2 = { nounwind readnone speculatable }
OpenPOWER on IntegriCloud