diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 31 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 55 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIIntrinsics.td | 15 |
14 files changed, 156 insertions, 73 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 93dcd728a0c..23a783e7612 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2999,6 +2999,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(TEXTURE_FETCH) NODE_NAME_CASE(EXPORT) + NODE_NAME_CASE(EXPORT_DONE) + NODE_NAME_CASE(R600_EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) NODE_NAME_CASE(REGISTER_STORE) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 6c6fc2eed3b..965d4d14190 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -280,7 +280,9 @@ enum NodeType : unsigned { MUL_LOHI_I24, MUL_LOHI_U24, TEXTURE_FETCH, - EXPORT, + EXPORT, // exp on SI+ + EXPORT_DONE, // exp on SI+ with done bit set + R600_EXPORT, CONST_ADDRESS, REGISTER_LOAD, REGISTER_STORE, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 4bccd81b550..c8d1bfb1b78 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -265,9 +265,35 @@ def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", SDTypeProfile<1, 4, [SDTCisFP<0>]>, [SDNPInGlue]>; + def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT, [SDNPHasChain, SDNPSideEffect]>; +// SI+ export +def AMDGPUExportOp : SDTypeProfile<0, 8, [ + SDTCisInt<0>, // i8 en + SDTCisInt<1>, // i1 vm + // skip done + SDTCisInt<2>, // i8 tgt + SDTCisSameAs<3, 1>, // i1 compr + SDTCisFP<4>, // f32 src0 + SDTCisSameAs<5, 4>, // f32 src1 + SDTCisSameAs<6, 4>, // f32 src2 + SDTCisSameAs<7, 4> // f32 src3 +]>; + +def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, + [SDNPHasChain, SDNPMayStore]>; + +def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; + + +def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; + +def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Flow Control Profile Types //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 35e6c9d036b..9a0d2c167ae 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -443,7 +443,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(2, DL, MVT::i32), // SWZ_Z DAG.getConstant(3, DL, MVT::i32) // SWZ_W }; - return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); } // default for switch(IntrinsicID) @@ -1882,7 +1882,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } - case AMDGPUISD::EXPORT: { + case AMDGPUISD::R600_EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) break; @@ -1898,7 +1898,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(7) // SWZ_W }; NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); - return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); } case AMDGPUISD::TEXTURE_FETCH: { SDValue Arg = N->getOperand(1); diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index f84372947d9..3a72e0791fd 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -436,11 +436,6 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; - -def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, - [SDNPHasChain, SDNPSideEffect]>; - class ExportWord0 { field bits<32> Word0; @@ -486,7 +481,7 @@ class ExportBufWord1 { } multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), (ExportInst R600_Reg128:$src, imm:$type, imm:$base, imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 5e6e754b532..479c6fc2148 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -15,7 +15,7 @@ namespace SIInstrFlags { // This needs to be kept in sync with the field bits in InstSI. -enum { +enum : uint32_t { SALU = 1 << 3, VALU = 1 << 4, @@ -38,15 +38,16 @@ enum { DS = 1 << 19, MIMG = 1 << 20, FLAT = 1 << 21, - WQM = 1 << 22, - VGPRSpill = 1 << 23, - SGPRSpill = 1 << 24, - VOPAsmPrefer32Bit = 1 << 25, - Gather4 = 1 << 26, - DisableWQM = 1 << 27, - SOPK_ZEXT = 1 << 28, - SCALAR_STORE = 1 << 29, - FIXED_SIZE = 1 << 30 + EXP = 1 << 22, + WQM = 1 << 23, + VGPRSpill = 1 << 24, + SGPRSpill = 1 << 25, + VOPAsmPrefer32Bit = 1 << 26, + Gather4 = 1 << 27, + DisableWQM = 1 << 28, + SOPK_ZEXT = 1 << 29, + SCALAR_STORE = 1 << 30, + FIXED_SIZE = 1u << 31 }; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ef61fc409f8..64f2c0a24e2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2683,6 +2683,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src); return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast); } + case AMDGPUIntrinsic::SI_export: { + const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(2)); + const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(3)); + const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(4)); + const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(5)); + const ConstantSDNode *Compr = cast<ConstantSDNode>(Op.getOperand(6)); + + const SDValue Ops[] = { + Chain, + DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), + DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1), + DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), + DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1), + Op.getOperand(7), // src0 + Op.getOperand(8), // src1 + Op.getOperand(9), // src2 + Op.getOperand(10) // src3 + }; + + unsigned Opc = Done->isNullValue() ? + AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; + return DAG.getNode(Opc, DL, Op->getVTList(), Ops); + } default: return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 9df0838ea61..91e4bf755c5 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -159,16 +159,15 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { MachineBasicBlock::iterator Insert = SkipBB->begin(); // Exec mask is zero: Export to NULL target... - BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP)) - .addImm(0) + BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE)) .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) - .addReg(AMDGPU::VGPR0, RegState::Undef); + .addReg(AMDGPU::VGPR0, RegState::Undef) + .addImm(1) // vm + .addImm(0) // compr + .addImm(0); // en // ... and terminate wavefront. BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 7bec2b66f43..202a1e9ed8a 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -195,8 +195,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); // Only consider stores or EXP for EXP_CNT - Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && - (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore())); + Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT) && MI.mayStore(); // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { @@ -238,9 +237,10 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { if (Op.isDef()) return true; - // For exports all registers are relevant + // For exports all registers are relevant. + // TODO: Skip undef/disabled registers. MachineInstr &MI = *Op.getParent(); - if (MI.getOpcode() == AMDGPU::EXP) + if (TII->isEXP(MI)) return true; // For stores the stored value is also relevant @@ -340,7 +340,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // Remember which export instructions we have seen if (Increment.Named.EXP) { - ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2; + ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2; } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index b95f209e270..5f260ba0c85 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -41,6 +41,7 @@ class InstSI <dag outs, dag ins, string asm = "", field bit DS = 0; field bit MIMG = 0; field bit FLAT = 0; + field bit EXP = 0; // Whether WQM _must_ be enabled for this instruction. field bit WQM = 0; @@ -96,15 +97,16 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{19} = DS; let TSFlags{20} = MIMG; let TSFlags{21} = FLAT; - let TSFlags{22} = WQM; - let TSFlags{23} = VGPRSpill; - let TSFlags{24} = SGPRSpill; - let TSFlags{25} = VOPAsmPrefer32Bit; - let TSFlags{26} = Gather4; - let TSFlags{27} = DisableWQM; - let TSFlags{28} = SOPKZext; - let TSFlags{29} = ScalarStore; - let TSFlags{30} = FixedSize; + let TSFlags{22} = EXP; + let TSFlags{23} = WQM; + let TSFlags{24} = VGPRSpill; + let TSFlags{25} = SGPRSpill; + let TSFlags{26} = VOPAsmPrefer32Bit; + let TSFlags{27} = Gather4; + let TSFlags{28} = DisableWQM; + let TSFlags{29} = SOPKZext; + let TSFlags{30} = ScalarStore; + let TSFlags{31} = FixedSize; let SchedRW = [Write32Bit]; @@ -232,6 +234,17 @@ class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; } +class EXPCommon<dag outs, dag ins, string asm, list<dag> pattern> : + InstSI<outs, ins, asm, pattern> { + let EXP = 1; + let EXP_CNT = 1; + let mayLoad = 0; // Set to 1 if done bit is set. + let mayStore = 1; + let UseNamedOperandTable = 1; + let Uses = [EXEC]; + let SchedRW = [WriteExport]; +} + } // End Uses = [EXEC] class MIMG <dag outs, dag ins, string asm, list<dag> pattern> : diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 71f1968d250..0f16fa0902f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -372,6 +372,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::FLAT; } + static bool isEXP(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::EXP; + } + + bool isEXP(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::EXP; + } + static bool isWQM(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::WQM; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 0f30d7b4657..39c0821e858 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -518,32 +518,39 @@ class SIMCInstr <string pseudo, int subtarget> { // EXP classes //===----------------------------------------------------------------------===// -class EXPCommon : InstSI< +class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon< (outs), - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, - VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3), - "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", - [] > { - - let EXP_CNT = 1; - let Uses = [EXEC]; - let SchedRW = [WriteExport]; -} - -multiclass EXP_m { - - let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ; - } - - def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe { - let DecoderNamespace="SICI"; - let DisableDecoder = DisableSIDecoder; - } + (ins i8imm:$tgt, VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3, + i1imm:$vm, i1imm:$compr, i8imm:$en), + "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3", + [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr), + f32:$src0, f32:$src1, f32:$src2, f32:$src3)] +>; - def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi { - let DecoderNamespace="VI"; - let DisableDecoder = DisableVIDecoder; +// Split EXP instruction into EXP and EXP_DONE so we can set +// mayLoad for done=1. +multiclass EXP_m<bit done, SDPatternOperator node> { + let mayLoad = done in { + let isPseudo = 1, isCodeGenOnly = 1 in { + def "" : EXP_Helper<done, node>, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>; + } + + let done = done in { + def _si : EXP_Helper<done>, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>, + EXPe { + let DecoderNamespace = "SICI"; + let DisableDecoder = DisableSIDecoder; + } + + def _vi : EXP_Helper<done>, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>, + EXPe_vi { + let DecoderNamespace = "VI"; + let DisableDecoder = DisableVIDecoder; + } + } } } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 0aa8e19c61b..b24bcd76035 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -37,7 +37,8 @@ let SubtargetPredicate = isGCN in { // EXP Instructions //===----------------------------------------------------------------------===// -defm EXP : EXP_m; +defm EXP : EXP_m<0, AMDGPUexport>; +defm EXP_DONE : EXP_m<1, AMDGPUexport_done>; //===----------------------------------------------------------------------===// // VINTRP Instructions @@ -388,13 +389,6 @@ def : Pat < (SI_KILL (i32 0xbf800000)) >; -def : Pat < - (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, - f32:$src0, f32:$src1, f32:$src2, f32:$src3), - (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, - $src0, $src1, $src2, $src3) ->; - //===----------------------------------------------------------------------===// // VOP1 Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/llvm/lib/Target/AMDGPU/SIIntrinsics.td index b2857f06cbb..5da37546871 100644 --- a/llvm/lib/Target/AMDGPU/SIIntrinsics.td +++ b/llvm/lib/Target/AMDGPU/SIIntrinsics.td @@ -15,7 +15,20 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + + def int_SI_export : Intrinsic <[], + [llvm_i32_ty, // en + llvm_i32_ty, // vm (FIXME: should be i1) + llvm_i32_ty, // done (FIXME: should be i1) + llvm_i32_ty, // tgt + llvm_i32_ty, // compr (FIXME: should be i1) + llvm_float_ty, // src0 + llvm_float_ty, // src1 + llvm_float_ty, // src2 + llvm_float_ty], // src3 + [] + >; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; |