diff options
author | Jan Vesely <jan.vesely@rutgers.edu> | 2017-01-04 18:06:55 +0000 |
---|---|---|
committer | Jan Vesely <jan.vesely@rutgers.edu> | 2017-01-04 18:06:55 +0000 |
commit | d48445d51392216978833ebe42751011b523b912 (patch) | |
tree | 655318d04b516209f1e8ff3b365038cbd5f9f9df | |
parent | 020b623a3b03a89aa56beba357bfa27c36cf13a7 (diff) | |
download | bcm5719-llvm-d48445d51392216978833ebe42751011b523b912.tar.gz bcm5719-llvm-d48445d51392216978833ebe42751011b523b912.zip |
AMDGPU/SI: Implement sendmsghalt intrinsic
v2: expose using amdgcn prefix
Differential Revision: https://reviews.llvm.org/D23511
llvm-svn: 290977
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 9 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 5 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SOPInstructions.td | 5 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll | 41 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll | 161 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll | 24 |
11 files changed, 230 insertions, 45 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 078959ce15d..07d5b5ea40d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -104,6 +104,13 @@ def int_amdgcn_dispatch_id : // Instruction Intrinsics //===----------------------------------------------------------------------===// +// The first parameter is s_sendmsg immediate (i16), +// the second one is copied to m0 +def int_amdgcn_s_sendmsg : GCCBuiltin<"__builtin_amdgcn_s_sendmsg">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; +def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">, + Intrinsic <[], [llvm_i32_ty, llvm_i32_ty], []>; + def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">, Intrinsic<[], [], [IntrConvergent]>; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a87204d46ea..0b0a0e7d083 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3048,6 +3048,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(KILL) case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; NODE_NAME_CASE(SENDMSG) + NODE_NAME_CASE(SENDMSGHALT) NODE_NAME_CASE(INTERP_MOV) NODE_NAME_CASE(INTERP_P1) NODE_NAME_CASE(INTERP_P2) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 5cc5efb331e..745c9923de2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -313,6 +313,7 @@ enum NodeType : unsigned { /// Pointer to the start of the shader's constant data. CONST_DATA_PTR, SENDMSG, + SENDMSGHALT, INTERP_MOV, INTERP_P1, INTERP_P2, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index e7b40016e27..f079c8d0c70 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -266,6 +266,10 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG", SDTypeProfile<0, 1, [SDTCisInt<0>]>, [SDNPHasChain, SDNPInGlue]>; +def AMDGPUsendmsghalt : SDNode<"AMDGPUISD::SENDMSGHALT", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain, SDNPInGlue]>; + def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV", SDTypeProfile<1, 3, [SDTCisFP<0>]>, [SDNPInGlue]>; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index fa53831cbe1..c78e97dfd46 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2706,12 +2706,19 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); switch (IntrinsicID) { - case AMDGPUIntrinsic::SI_sendmsg: { + case AMDGPUIntrinsic::SI_sendmsg: + case Intrinsic::amdgcn_s_sendmsg: { Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); SDValue Glue = Chain.getValue(1); return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain, Op.getOperand(2), Glue); } + case Intrinsic::amdgcn_s_sendmsghalt: { + Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3)); + SDValue Glue = Chain.getValue(1); + return DAG.getNode(AMDGPUISD::SENDMSGHALT, DL, MVT::Other, Chain, + Op.getOperand(2), Glue); + } case AMDGPUIntrinsic::SI_tbuffer_store: { SDValue Ops[] = { Chain, diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 202a1e9ed8a..fceabd7a8fd 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -504,7 +504,7 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, return; // There must be "S_NOP 0" between an instruction writing M0 and S_SENDMSG. - if (LastInstWritesM0 && I->getOpcode() == AMDGPU::S_SENDMSG) { + if (LastInstWritesM0 && (I->getOpcode() == AMDGPU::S_SENDMSG || I->getOpcode() == AMDGPU::S_SENDMSGHALT)) { BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0); LastInstWritesM0 = false; return; @@ -619,7 +619,8 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { // signalling other hardware blocks if ((I->getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) || - I->getOpcode() == AMDGPU::S_SENDMSG) + I->getOpcode() == AMDGPU::S_SENDMSG || + I->getOpcode() == AMDGPU::S_SENDMSGHALT) Required = LastIssued; else Required = handleOperands(*I); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 0aeb1297d3a..73cd5774128 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -828,9 +828,12 @@ let Uses = [EXEC, M0] in { def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16", [(AMDGPUsendmsg (i32 imm:$simm16))] >; + +def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16", + [(AMDGPUsendmsghalt (i32 imm:$simm16))] +>; } // End Uses = [EXEC, M0] -def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">; def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">; def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> { let simm16 = 0; diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll new file mode 100644 index 00000000000..8d8885852af --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg-m0.ll @@ -0,0 +1,41 @@ +; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s + +; GCN-LABEL: {{^}}main: +; GCN: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) +; GCN-NEXT: s_endpgm + +define amdgpu_gs void @main(i32 inreg %a) #0 { + call void @llvm.amdgcn.s.sendmsg(i32 3, i32 %a) + ret void +} + +; GCN-LABEL: {{^}}main_halt: +; GCN: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; GCN-NEXT: s_sendmsghalt sendmsg(MSG_INTERRUPT) +; GCN-NEXT: s_endpgm + +define void @main_halt(i32 inreg %a) #0 { + call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 %a) + ret void +} + +; GCN-LABEL: {{^}}legacy: +; GCN: s_mov_b32 m0, s0 +; VI-NEXT: s_nop 0 +; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) +; GCN-NEXT: s_endpgm + +define amdgpu_gs void @legacy(i32 inreg %a) #0 { + call void @llvm.SI.sendmsg(i32 3, i32 %a) + ret void +} + +declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 +declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0 +declare void @llvm.SI.sendmsg(i32, i32) #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll new file mode 100644 index 00000000000..31f9cfca6de --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.sendmsg.ll @@ -0,0 +1,161 @@ +;RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s +;RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s + +; CHECK-LABEL: {{^}}test_interrupt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT) +define void @test_interrupt() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 1, i32 0); + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0) +define void @test_gs_emit() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 34, i32 0); + ret void +} + +; CHECK-LABEL: {{^}}test_gs_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1) +define void @test_gs_cut() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 274, i32 0); + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) +define void @test_gs_emit_cut() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 562, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_done: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) +define void @test_gs_done() { +body: + call void @llvm.amdgcn.s.sendmsg(i32 3, i32 0) + ret void +} + + +; CHECK-LABEL: {{^}}test_interrupt_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_INTERRUPT) +define void @test_interrupt_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 1, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT, 0) +define void @test_gs_emit_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 34, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_cut_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_CUT, 1) +define void @test_gs_cut_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 274, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_emit_cut_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) +define void @test_gs_emit_cut_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 562, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_gs_done_halt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsghalt sendmsg(MSG_GS_DONE, GS_OP_NOP) +define void @test_gs_done_halt() { +body: + call void @llvm.amdgcn.s.sendmsghalt(i32 3, i32 0) + ret void +} + +; Legacy +; CHECK-LABEL: {{^}}test_legacy_interrupt: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_INTERRUPT) +define void @test_legacy_interrupt() { +body: + call void @llvm.SI.sendmsg(i32 1, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_emit: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0) +define void @test_legacy_gs_emit() { +body: + call void @llvm.SI.sendmsg(i32 34, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1) +define void @test_legacy_gs_cut() { +body: + call void @llvm.SI.sendmsg(i32 274, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_emit_cut: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) +define void @test_legacy_gs_emit_cut() { +body: + call void @llvm.SI.sendmsg(i32 562, i32 0) + ret void +} + +; CHECK-LABEL: {{^}}test_legacy_gs_done: +; CHECK: s_mov_b32 m0, 0 +; CHECK-NOT: s_mov_b32 m0 +; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) +define void @test_legacy_gs_done() { +body: + call void @llvm.SI.sendmsg(i32 3, i32 0) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.amdgcn.s.sendmsg(i32, i32) #0 +declare void @llvm.amdgcn.s.sendmsghalt(i32, i32) #0 +declare void @llvm.SI.sendmsg(i32, i32) #0 + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll b/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll deleted file mode 100644 index 2d4987643a2..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg-m0.ll +++ /dev/null @@ -1,17 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN %s - -; GCN-LABEL: {{^}}main: -; GCN: s_mov_b32 m0, s0 -; VI-NEXT: s_nop 0 -; GCN-NEXT: sendmsg(MSG_GS_DONE, GS_OP_NOP) -; GCN-NEXT: s_endpgm - -define amdgpu_gs void @main(i32 inreg %a) #0 { - call void @llvm.SI.sendmsg(i32 3, i32 %a) - ret void -} - -declare void @llvm.SI.sendmsg(i32, i32) #0 - -attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll b/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll deleted file mode 100644 index c4bb27676e7..00000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.SI.sendmsg.ll +++ /dev/null @@ -1,24 +0,0 @@ -;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s -;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s - -; CHECK-LABEL: {{^}}main: -; CHECK: s_mov_b32 m0, 0 -; CHECK-NOT: s_mov_b32 m0 -; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT, 0) -; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_CUT, 1) -; CHECK: s_sendmsg sendmsg(MSG_GS, GS_OP_EMIT_CUT, 2) -; CHECK: s_sendmsg sendmsg(MSG_GS_DONE, GS_OP_NOP) - -define void @main() { -main_body: - call void @llvm.SI.sendmsg(i32 34, i32 0); - call void @llvm.SI.sendmsg(i32 274, i32 0); - call void @llvm.SI.sendmsg(i32 562, i32 0); - call void @llvm.SI.sendmsg(i32 3, i32 0); - ret void -} - -; Function Attrs: nounwind -declare void @llvm.SI.sendmsg(i32, i32) #0 - -attributes #0 = { nounwind } |