diff options
author | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-12 17:52:51 +0000 |
---|---|---|
committer | Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com> | 2019-06-12 17:52:51 +0000 |
commit | 5f581c9f08e6cbb22002371caa2d1f23337a9054 (patch) | |
tree | 5e351d3b7fc362ee39bcb36ac3244a18facb6617 /llvm/lib | |
parent | efc0d1a29801adc95450b9a2129f213d15c23164 (diff) | |
download | bcm5719-llvm-5f581c9f08e6cbb22002371caa2d1f23337a9054.tar.gz bcm5719-llvm-5f581c9f08e6cbb22002371caa2d1f23337a9054.zip |
[AMDGPU] gfx1010 premlane instructions
Differential Revision: https://reviews.llvm.org/D63202
llvm-svn: 363185
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 19 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 44 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 12 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 18 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/VOP3Instructions.td | 28 |
7 files changed, 142 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 37879520ec0..bc4fe3dd522 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1148,6 +1148,7 @@ private: bool validateMIMGD16(const MCInst &Inst); bool validateMIMGDim(const MCInst &Inst); bool validateLdsDirect(const MCInst &Inst); + bool validateOpSel(const MCInst &Inst); bool validateVOP3Literal(const MCInst &Inst) const; bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; @@ -3003,6 +3004,19 @@ bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { return NumLiterals <= 1; } +bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { + const unsigned Opc = Inst.getOpcode(); + if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || + Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { + int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); + unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); + + if (OpSel & ~3) + return false; + } + return true; +} + // VOP3 literal is only allowed in GFX10+ and only one can be used bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { unsigned Opcode = Inst.getOpcode(); @@ -3071,6 +3085,11 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, "integer clamping is not supported on this GPU"); return false; } + if (!validateOpSel(Inst)) { + Error(IDLoc, + "invalid op_sel operand"); + return false; + } // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. if (!validateMIMGD16(Inst)) { Error(IDLoc, diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 87a8c06e697..f19fa684629 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -115,6 +115,12 @@ static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII, } } +static bool isPermlane(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + return Opcode == AMDGPU::V_PERMLANE16_B32 || + Opcode == AMDGPU::V_PERMLANEX16_B32; +} + static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) { const MachineOperand *RegOp = TII->getNamedOperand(RegInstr, AMDGPU::OpName::simm16); @@ -835,11 +841,49 @@ int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) { void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixVMEMtoScalarWriteHazards(MI); + fixVcmpxPermlaneHazards(MI); fixSMEMtoVectorWriteHazards(MI); fixVcmpxExecWARHazard(MI); fixLdsBranchVmemWARHazard(MI); } +bool GCNHazardRecognizer::fixVcmpxPermlaneHazards(MachineInstr *MI) { + if (!ST.hasVcmpxPermlaneHazard() || !isPermlane(*MI)) + return false; + + const SIInstrInfo *TII = ST.getInstrInfo(); + auto IsHazardFn = [TII] (MachineInstr *MI) { + return TII->isVOPC(*MI); + }; + + auto IsExpiredFn = [] (MachineInstr *MI, int) { + if (!MI) + return false; + unsigned Opc = MI->getOpcode(); + return SIInstrInfo::isVALU(*MI) && + Opc != AMDGPU::V_NOP_e32 && + Opc != AMDGPU::V_NOP_e64 && + Opc != AMDGPU::V_NOP_sdwa; + }; + + if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == + std::numeric_limits<int>::max()) + return false; + + // V_NOP will be discarded by SQ. + // Use V_MOB_B32 v?, v?. Register must be alive so use src0 of V_PERMLANE* + // which is always a VGPR and available. + auto *Src0 = TII->getNamedOperand(*MI, AMDGPU::OpName::src0); + unsigned Reg = Src0->getReg(); + bool IsUndef = Src0->isUndef(); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::V_MOV_B32_e32)) + .addReg(Reg, RegState::Define | (IsUndef ? RegState::Dead : 0)) + .addReg(Reg, IsUndef ? RegState::Undef : RegState::Kill); + + return true; +} + bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) { if (!ST.hasVMEMtoScalarWriteHazard()) return false; diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index bf55976934e..0c4c9d9d982 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -84,8 +84,8 @@ private: int checkAnyInstHazards(MachineInstr *MI); int checkReadM0Hazards(MachineInstr *SMovRel); int checkNSAtoVMEMHazard(MachineInstr *MI); - void fixHazards(MachineInstr *MI); + bool fixVcmpxPermlaneHazards(MachineInstr *MI); bool fixVMEMtoScalarWriteHazards(MachineInstr *MI); bool fixSMEMtoVectorWriteHazards(MachineInstr *MI); bool fixVcmpxExecWARHazard(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp index 0e2706349b1..114f98cbfaa 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp @@ -1005,6 +1005,18 @@ void AMDGPUInstPrinter::printPackedModifier(const MCInst *MI, void AMDGPUInstPrinter::printOpSel(const MCInst *MI, unsigned, const MCSubtargetInfo &STI, raw_ostream &O) { + unsigned Opc = MI->getOpcode(); + if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || + Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { + auto FIN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); + auto BCN = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1_modifiers); + unsigned FI = !!(MI->getOperand(FIN).getImm() & SISrcMods::OP_SEL_0); + unsigned BC = !!(MI->getOperand(BCN).getImm() & SISrcMods::OP_SEL_0); + if (FI || BC) + O << " op_sel:[" << FI << ',' << BC << ']'; + return; + } + printPackedModifier(MI, " op_sel:[", SISrcMods::OP_SEL_0, O); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index c444c08487a..3bb1ddc6703 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9721,6 +9721,24 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, Ops.push_back(ImpDef.getValue(1)); return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } + case AMDGPU::V_PERMLANE16_B32: + case AMDGPU::V_PERMLANEX16_B32: { + ConstantSDNode *FI = cast<ConstantSDNode>(Node->getOperand(0)); + ConstantSDNode *BC = cast<ConstantSDNode>(Node->getOperand(2)); + if (!FI->getZExtValue() && !BC->getZExtValue()) + break; + SDValue VDstIn = Node->getOperand(6); + if (VDstIn.isMachineOpcode() + && VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF) + break; + MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, + SDLoc(Node), MVT::i32); + SmallVector<SDValue, 8> Ops = { SDValue(FI, 0), Node->getOperand(1), + SDValue(BC, 0), Node->getOperand(3), + Node->getOperand(4), Node->getOperand(5), + SDValue(ImpDef, 0), Node->getOperand(7) }; + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); + } default: break; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 775b35d6521..dc35bf4e3eb 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3790,6 +3790,26 @@ void SIInstrInfo::legalizeOperandsVOP3(MachineRegisterInfo &MRI, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2) }; + if (Opc == AMDGPU::V_PERMLANE16_B32 || + Opc == AMDGPU::V_PERMLANEX16_B32) { + // src1 and src2 must be scalar + MachineOperand &Src1 = MI.getOperand(VOP3Idx[1]); + MachineOperand &Src2 = MI.getOperand(VOP3Idx[2]); + const DebugLoc &DL = MI.getDebugLoc(); + if (Src1.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src1.getReg()))) { + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .add(Src1); + Src1.ChangeToRegister(Reg, false); + } + if (Src2.isReg() && !RI.isSGPRClass(MRI.getRegClass(Src2.getReg()))) { + unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass); + BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg) + .add(Src2); + Src2.ChangeToRegister(Reg, false); + } + } + // Find the one SGPR operand we are allowed to use. int ConstantBusLimit = ST.getConstantBusLimit(Opc); int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0; diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 4129aac4d58..a15c0571ae7 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -625,9 +625,35 @@ def : ThreeOp_i32_Pats<xor, add, V_XAD_U32>; } // End SubtargetPredicate = isGFX9Plus +def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3_OPSEL> { + let Src0RC64 = VRegSrc_32; + let Src1RC64 = SCSrc_b32; + let Src2RC64 = SCSrc_b32; + let InsVOP3OpSel = (ins IntOpSelMods:$src0_modifiers, VRegSrc_32:$src0, + IntOpSelMods:$src1_modifiers, SCSrc_b32:$src1, + IntOpSelMods:$src2_modifiers, SCSrc_b32:$src2, + VGPR_32:$vdst_in, op_sel:$op_sel); + let HasClamp = 0; + let HasOMod = 0; +} + let SubtargetPredicate = isGFX10Plus in { def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>; def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>; + + let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in { + def V_PERMLANE16_B32 : VOP3Inst <"v_permlane16_b32", VOP3_PERMLANE_Profile>; + def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>; + } // End $vdst = $vdst_in, DisableEncoding $vdst_in + + def : GCNPat< + (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) + >; + def : GCNPat< + (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, imm:$fi, imm:$bc), + (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) + >; } // End SubtargetPredicate = isGFX10Plus //===----------------------------------------------------------------------===// @@ -790,6 +816,8 @@ defm V_MAX_I16 : VOP3_Real_gfx10_with_name<0x30a, "V_MAX_I16_e64", "v_ma defm V_MIN_U16 : VOP3_Real_gfx10_with_name<0x30b, "V_MIN_U16_e64", "v_min_u16">; defm V_MIN_I16 : VOP3_Real_gfx10_with_name<0x30c, "V_MIN_I16_e64", "v_min_i16">; defm V_LSHLREV_B16 : VOP3_Real_gfx10_with_name<0x314, "V_LSHLREV_B16_e64", "v_lshlrev_b16">; +defm V_PERMLANE16_B32 : VOP3OpSel_Real_gfx10<0x377>; +defm V_PERMLANEX16_B32 : VOP3OpSel_Real_gfx10<0x378>; //===----------------------------------------------------------------------===// // GFX7, GFX10. |