summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Vesely <jan.vesely@rutgers.edu>2017-01-04 18:06:55 +0000
committerJan Vesely <jan.vesely@rutgers.edu>2017-01-04 18:06:55 +0000
commitd48445d51392216978833ebe42751011b523b912 (patch)
tree655318d04b516209f1e8ff3b365038cbd5f9f9df
parent020b623a3b03a89aa56beba357bfa27c36cf13a7 (diff)
downloadbcm5719-llvm-d48445d51392216978833ebe42751011b523b912.tar.gz
bcm5719-llvm-d48445d51392216978833ebe42751011b523b912.zip
AMDGPU/SI: Implement sendmsghalt intrinsic
v2: expose using amdgcn prefix Differential Revision: https://reviews.llvm.org/D23511 llvm-svn: 290977
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaits.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td5
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll41
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll161
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll17
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll24
11 files changed, 230 insertions, 45 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 078959ce15d..07d5b5ea40d 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id :
// Instruction Intrinsics
//===----------------------------------------------------------------------===//
+// The first parameter is s_sendmsg immediate (i16),
+// the second one is copied to m0
+def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">,
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
+ Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>;
+
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrConvergent]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a87204d46ea..0b0a0e7d083 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(KILL)
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
NODE_NAME_CASE(SENDMSG)
+ NODE_NAME_CASE(SENDMSGHALT)
NODE_NAME_CASE(INTERP_MOV)
NODE_NAME_CASE(INTERP_P1)
NODE_NAME_CASE(INTERP_P2)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 5cc5efb331e..745c9923de2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -313,6 +313,7 @@ enum NodeType : unsigned {
/// Pointer to the start of the shader's constant data.
CONST_DATA_PTR,
SENDMSG,
+ SENDMSGHALT,
INTERP_MOV,
INTERP_P1,
INTERP_P2,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index e7b40016e27..f079c8d0c70 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
[SDNPHasChain, SDNPInGlue]>;
+def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT",
+ SDTypeProfile<0, 1, [SDTCisInt<0>]>,
+ [SDNPHasChain, SDNPInGlue]>;
+
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
[SDNPInGlue]>;
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fa53831cbe1..c78e97dfd46 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
switch (IntrinsicID) {
- case AMDGPUIntrinsic::SI_sendmsg: {
+ case AMDGPUIntrinsic::SI_sendmsg:
+ case Intrinsic::amdgcn_s_sendmsg: {
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
SDValue Glue = Chain.getValue(1);
return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
Op.getOperand(2), Glue);
}
+ case Intrinsic::amdgcn_s_sendmsghalt: {
+ Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
+ SDValue Glue = Chain.getValue(1);
+ return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain,
+ Op.getOperand(2), Glue);
+ }
case AMDGPUIntrinsic::SI_tbuffer_store: {
SDValue Ops[] = {
Chain,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index 202a1e9ed8a..fceabd7a8fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB,
return;
// There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG.
- if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) {
+ if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) {
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
LastInstWritesM0 = false;
return;
@@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
ST->needWaitcntBeforeBarrier()) ||
- I->getOpcode() == AMDGPU::S_SENDMSG)
+ I->getOpcode() == AMDGPU::S_SENDMSG ||
+ I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Required = LastIssued;
else
Required = handleOperands(*I);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 0aeb1297d3a..73cd5774128 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in {
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
[(AMDGPUsendmsg (i32 imm:$simm16))]
>;
+
+def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16",
+ [(AMDGPUsendmsghalt (i32 imm:$simm16))]
+>;
} // End Uses = [EXEC, M0]
-def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
let simm16 = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll
new file mode 100644
index 00000000000..8d8885852af
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll
@@ -0,0 +1,41 @@
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}main:
+; GCN: s_mov_b32 m0, s0
+; VI-NEXT: s_nop 0
+; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
+; GCN-NEXT: s_endpgm
+
+define amdgpu_gs void @main(i32 inreg %a) #0 {
+ call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %a)
+ ret void
+}
+
+; GCN-LABEL: {{^}}main_halt:
+; GCN: s_mov_b32 m0, s0
+; VI-NEXT: s_nop 0
+; GCN-NEXT: s_sendmsghalt sendmsg(MSG_INTERRUPT)
+; GCN-NEXT: s_endpgm
+
+define void @main_halt(i32 inreg %a) #0 {
+ call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %a)
+ ret void
+}
+
+; GCN-LABEL: {{^}}legacy:
+; GCN: s_mov_b32 m0, s0
+; VI-NEXT: s_nop 0
+; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
+; GCN-NEXT: s_endpgm
+
+define amdgpu_gs void @legacy(i32 inreg %a) #0 {
+ call void @llvm.SI.sendmsg(i32 3, i32 %a)
+ ret void
+}
+
+declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
+declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
+declare void @llvm.SI.sendmsg(i32, i32) #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll
new file mode 100644
index 00000000000..31f9cfca6de
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll
@@ -0,0 +1,161 @@
+;RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
+
+; CHECK-LABEL: {{^}}test_interrupt:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT)
+define void @test_interrupt() {
+body:
+ call void @llvm.amdgcn.s.sendmsg(i32 1, i32 0);
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_emit:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0)
+define void @test_gs_emit() {
+body:
+ call void @llvm.amdgcn.s.sendmsg(i32 34, i32 0);
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_cut:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1)
+define void @test_gs_cut() {
+body:
+ call void @llvm.amdgcn.s.sendmsg(i32 274, i32 0);
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_emit_cut:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
+define void @test_gs_emit_cut() {
+body:
+ call void @llvm.amdgcn.s.sendmsg(i32 562, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_done:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
+define void @test_gs_done() {
+body:
+ call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0)
+ ret void
+}
+
+
+; CHECK-LABEL: {{^}}test_interrupt_halt:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsghalt sendmsg(MSG_INTERRUPT)
+define void @test_interrupt_halt() {
+body:
+ call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_emit_halt:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT, 0)
+define void @test_gs_emit_halt() {
+body:
+ call void @llvm.amdgcn.s.sendmsghalt(i32 34, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_cut_halt:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_CUT, 1)
+define void @test_gs_cut_halt() {
+body:
+ call void @llvm.amdgcn.s.sendmsghalt(i32 274, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_emit_cut_halt:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
+define void @test_gs_emit_cut_halt() {
+body:
+ call void @llvm.amdgcn.s.sendmsghalt(i32 562, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_gs_done_halt:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsghalt sendmsg(MSG_GS_DONE, GS_OP_NOP)
+define void @test_gs_done_halt() {
+body:
+ call void @llvm.amdgcn.s.sendmsghalt(i32 3, i32 0)
+ ret void
+}
+
+; Legacy
+; CHECK-LABEL: {{^}}test_legacy_interrupt:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT)
+define void @test_legacy_interrupt() {
+body:
+ call void @llvm.SI.sendmsg(i32 1, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_legacy_gs_emit:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0)
+define void @test_legacy_gs_emit() {
+body:
+ call void @llvm.SI.sendmsg(i32 34, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_legacy_gs_cut:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1)
+define void @test_legacy_gs_cut() {
+body:
+ call void @llvm.SI.sendmsg(i32 274, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_legacy_gs_emit_cut:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
+define void @test_legacy_gs_emit_cut() {
+body:
+ call void @llvm.SI.sendmsg(i32 562, i32 0)
+ ret void
+}
+
+; CHECK-LABEL: {{^}}test_legacy_gs_done:
+; CHECK: s_mov_b32 m0, 0
+; CHECK-NOT: s_mov_b32 m0
+; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
+define void @test_legacy_gs_done() {
+body:
+ call void @llvm.SI.sendmsg(i32 3, i32 0)
+ ret void
+}
+
+; Function Attrs: nounwind
+declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0
+declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0
+declare void @llvm.SI.sendmsg(i32, i32) #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll b/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll
deleted file mode 100644
index 2d4987643a2..00000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s
-
-; GCN-LABEL: {{^}}main:
-; GCN: s_mov_b32 m0, s0
-; VI-NEXT: s_nop 0
-; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP)
-; GCN-NEXT: s_endpgm
-
-define amdgpu_gs void @main(i32 inreg %a) #0 {
- call void @llvm.SI.sendmsg(i32 3, i32 %a)
- ret void
-}
-
-declare void @llvm.SI.sendmsg(i32, i32) #0
-
-attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll b/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll
deleted file mode 100644
index c4bb27676e7..00000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: s_mov_b32 m0, 0
-; CHECK-NOT: s_mov_b32 m0
-; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0)
-; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1)
-; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2)
-; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP)
-
-define void @main() {
-main_body:
- call void @llvm.SI.sendmsg(i32 34, i32 0);
- call void @llvm.SI.sendmsg(i32 274, i32 0);
- call void @llvm.SI.sendmsg(i32 562, i32 0);
- call void @llvm.SI.sendmsg(i32 3, i32 0);
- ret void
-}
-
-; Function Attrs: nounwind
-declare void @llvm.SI.sendmsg(i32, i32) #0
-
-attributes #0 = { nounwind }
OpenPOWER on IntegriCloud