diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-05 20:23:10 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-05 20:23:10 +0000 |
commit | 7bee6ac798f2c547753dd867e130ec587f201483 (patch) | |
tree | da0fea7e1f415a0dbada331fc836c3e2ca547240 /llvm/lib/Target | |
parent | df87d070c917029bd0209408fcfe833d149bcca7 (diff) | |
download | bcm5719-llvm-7bee6ac798f2c547753dd867e130ec587f201483.tar.gz bcm5719-llvm-7bee6ac798f2c547753dd867e130ec587f201483.zip |
AMDGPU: Refactor exp instructions
Structure the definitions a bit more like the other classes.
The main change here is to split EXP with the done bit set
to a separate opcode, so we can set mayLoad = 1 so that it won't
be reordered before the other exp stores, since this has the special
constraint that if the done bit is set then this should be the last
exp in she shader.
Previously all exp instructions were inferred to have unmodeled
side effects.
llvm-svn: 288695
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 4 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td | 26 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/R600Instructions.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIDefines.h | 21 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 23 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertSkips.cpp | 11 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrFormats.td | 31 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.h | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 55 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIIntrinsics.td | 15 |
14 files changed, 156 insertions, 73 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 93dcd728a0c..23a783e7612 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2999,6 +2999,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MAD_I24) NODE_NAME_CASE(TEXTURE_FETCH) NODE_NAME_CASE(EXPORT) + NODE_NAME_CASE(EXPORT_DONE) + NODE_NAME_CASE(R600_EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) NODE_NAME_CASE(REGISTER_STORE) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 6c6fc2eed3b..965d4d14190 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -280,7 +280,9 @@ enum NodeType : unsigned { MUL_LOHI_I24, MUL_LOHI_U24, TEXTURE_FETCH, - EXPORT, + EXPORT, // exp on SI+ + EXPORT_DONE, // exp on SI+ with done bit set + R600_EXPORT, CONST_ADDRESS, REGISTER_LOAD, REGISTER_STORE, diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 4bccd81b550..c8d1bfb1b78 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -265,9 +265,35 @@ def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2", SDTypeProfile<1, 4, [SDTCisFP<0>]>, [SDNPInGlue]>; + def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT, [SDNPHasChain, SDNPSideEffect]>; +// SI+ export +def AMDGPUExportOp : SDTypeProfile<0, 8, [ + SDTCisInt<0>, // i8 en + SDTCisInt<1>, // i1 vm + // skip done + SDTCisInt<2>, // i8 tgt + SDTCisSameAs<3, 1>, // i1 compr + SDTCisFP<4>, // f32 src0 + SDTCisSameAs<5, 4>, // f32 src1 + SDTCisSameAs<6, 4>, // f32 src2 + SDTCisSameAs<7, 4> // f32 src3 +]>; + +def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, + [SDNPHasChain, SDNPMayStore]>; + +def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; + + +def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; + +def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp, + [SDNPHasChain, SDNPSideEffect]>; + //===----------------------------------------------------------------------===// // Flow Control Profile Types //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index 35e6c9d036b..9a0d2c167ae 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -443,7 +443,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const DAG.getConstant(2, DL, MVT::i32), // SWZ_Z DAG.getConstant(3, DL, MVT::i32) // SWZ_W }; - return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args); } // default for switch(IntrinsicID) @@ -1882,7 +1882,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } - case AMDGPUISD::EXPORT: { + case AMDGPUISD::R600_EXPORT: { SDValue Arg = N->getOperand(1); if (Arg.getOpcode() != ISD::BUILD_VECTOR) break; @@ -1898,7 +1898,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N, N->getOperand(7) // SWZ_W }; NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL); - return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs); + return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs); } case AMDGPUISD::TEXTURE_FETCH: { SDValue Arg = N->getOperand(1); diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td index f84372947d9..3a72e0791fd 100644 --- a/llvm/lib/Target/AMDGPU/R600Instructions.td +++ b/llvm/lib/Target/AMDGPU/R600Instructions.td @@ -436,11 +436,6 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>; // Export Instructions //===----------------------------------------------------------------------===// -def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; - -def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType, - [SDNPHasChain, SDNPSideEffect]>; - class ExportWord0 { field bits<32> Word0; @@ -486,7 +481,7 @@ class ExportBufWord1 { } multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> { - def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), + def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type), (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)), (ExportInst R600_Reg128:$src, imm:$type, imm:$base, imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0) diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h index 5e6e754b532..479c6fc2148 100644 --- a/llvm/lib/Target/AMDGPU/SIDefines.h +++ b/llvm/lib/Target/AMDGPU/SIDefines.h @@ -15,7 +15,7 @@ namespace SIInstrFlags { // This needs to be kept in sync with the field bits in InstSI. -enum { +enum : uint32_t { SALU = 1 << 3, VALU = 1 << 4, @@ -38,15 +38,16 @@ enum { DS = 1 << 19, MIMG = 1 << 20, FLAT = 1 << 21, - WQM = 1 << 22, - VGPRSpill = 1 << 23, - SGPRSpill = 1 << 24, - VOPAsmPrefer32Bit = 1 << 25, - Gather4 = 1 << 26, - DisableWQM = 1 << 27, - SOPK_ZEXT = 1 << 28, - SCALAR_STORE = 1 << 29, - FIXED_SIZE = 1 << 30 + EXP = 1 << 22, + WQM = 1 << 23, + VGPRSpill = 1 << 24, + SGPRSpill = 1 << 25, + VOPAsmPrefer32Bit = 1 << 26, + Gather4 = 1 << 27, + DisableWQM = 1 << 28, + SOPK_ZEXT = 1 << 29, + SCALAR_STORE = 1 << 30, + FIXED_SIZE = 1u << 31 }; } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index ef61fc409f8..64f2c0a24e2 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2683,6 +2683,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src); return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast); } + case AMDGPUIntrinsic::SI_export: { + const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(2)); + const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(3)); + const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(4)); + const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(5)); + const ConstantSDNode *Compr = cast<ConstantSDNode>(Op.getOperand(6)); + + const SDValue Ops[] = { + Chain, + DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), + DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1), + DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), + DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1), + Op.getOperand(7), // src0 + Op.getOperand(8), // src1 + Op.getOperand(9), // src2 + Op.getOperand(10) // src3 + }; + + unsigned Opc = Done->isNullValue() ? + AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; + return DAG.getNode(Opc, DL, Op->getVTList(), Ops); + } default: return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp index 9df0838ea61..91e4bf755c5 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp @@ -159,16 +159,15 @@ bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) { MachineBasicBlock::iterator Insert = SkipBB->begin(); // Exec mask is zero: Export to NULL target... - BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP)) - .addImm(0) + BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE)) .addImm(0x09) // V_008DFC_SQ_EXP_NULL - .addImm(0) - .addImm(1) - .addImm(1) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) .addReg(AMDGPU::VGPR0, RegState::Undef) - .addReg(AMDGPU::VGPR0, RegState::Undef); + .addReg(AMDGPU::VGPR0, RegState::Undef) + .addImm(1) // vm + .addImm(0) // compr + .addImm(0); // en // ... and terminate wavefront. BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM)); diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index 7bec2b66f43..202a1e9ed8a 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -195,8 +195,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) { Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT); // Only consider stores or EXP for EXP_CNT - Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT && - (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore())); + Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT) && MI.mayStore(); // LGKM may uses larger values if (TSFlags & SIInstrFlags::LGKM_CNT) { @@ -238,9 +237,10 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) { if (Op.isDef()) return true; - // For exports all registers are relevant + // For exports all registers are relevant. + // TODO: Skip undef/disabled registers. MachineInstr &MI = *Op.getParent(); - if (MI.getOpcode() == AMDGPU::EXP) + if (TII->isEXP(MI)) return true; // For stores the stored value is also relevant @@ -340,7 +340,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB, // Remember which export instructions we have seen if (Increment.Named.EXP) { - ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2; + ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2; } for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td index b95f209e270..5f260ba0c85 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td +++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td @@ -41,6 +41,7 @@ class InstSI <dag outs, dag ins, string asm = "", field bit DS = 0; field bit MIMG = 0; field bit FLAT = 0; + field bit EXP = 0; // Whether WQM _must_ be enabled for this instruction. field bit WQM = 0; @@ -96,15 +97,16 @@ class InstSI <dag outs, dag ins, string asm = "", let TSFlags{19} = DS; let TSFlags{20} = MIMG; let TSFlags{21} = FLAT; - let TSFlags{22} = WQM; - let TSFlags{23} = VGPRSpill; - let TSFlags{24} = SGPRSpill; - let TSFlags{25} = VOPAsmPrefer32Bit; - let TSFlags{26} = Gather4; - let TSFlags{27} = DisableWQM; - let TSFlags{28} = SOPKZext; - let TSFlags{29} = ScalarStore; - let TSFlags{30} = FixedSize; + let TSFlags{22} = EXP; + let TSFlags{23} = WQM; + let TSFlags{24} = VGPRSpill; + let TSFlags{25} = SGPRSpill; + let TSFlags{26} = VOPAsmPrefer32Bit; + let TSFlags{27} = Gather4; + let TSFlags{28} = DisableWQM; + let TSFlags{29} = SOPKZext; + let TSFlags{30} = ScalarStore; + let TSFlags{31} = FixedSize; let SchedRW = [Write32Bit]; @@ -232,6 +234,17 @@ class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> : let hasSideEffects = 0; } +class EXPCommon<dag outs, dag ins, string asm, list<dag> pattern> : + InstSI<outs, ins, asm, pattern> { + let EXP = 1; + let EXP_CNT = 1; + let mayLoad = 0; // Set to 1 if done bit is set. + let mayStore = 1; + let UseNamedOperandTable = 1; + let Uses = [EXEC]; + let SchedRW = [WriteExport]; +} + } // End Uses = [EXEC] class MIMG <dag outs, dag ins, string asm, list<dag> pattern> : diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 71f1968d250..0f16fa0902f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -372,6 +372,14 @@ public: return get(Opcode).TSFlags & SIInstrFlags::FLAT; } + static bool isEXP(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::EXP; + } + + bool isEXP(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::EXP; + } + static bool isWQM(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::WQM; } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 0f30d7b4657..39c0821e858 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -518,32 +518,39 @@ class SIMCInstr <string pseudo, int subtarget> { // EXP classes //===----------------------------------------------------------------------===// -class EXPCommon : InstSI< +class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon< (outs), - (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm, - VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3), - "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3", - [] > { - - let EXP_CNT = 1; - let Uses = [EXEC]; - let SchedRW = [WriteExport]; -} - -multiclass EXP_m { - - let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ; - } - - def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe { - let DecoderNamespace="SICI"; - let DisableDecoder = DisableSIDecoder; - } + (ins i8imm:$tgt, VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3, + i1imm:$vm, i1imm:$compr, i8imm:$en), + "exp $en, $tgt, $compr, "#!if(done, "1", "0")#", $vm, $src0, $src1, $src2, $src3", + [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr), + f32:$src0, f32:$src1, f32:$src2, f32:$src3)] +>; - def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi { - let DecoderNamespace="VI"; - let DisableDecoder = DisableVIDecoder; +// Split EXP instruction into EXP and EXP_DONE so we can set +// mayLoad for done=1. +multiclass EXP_m<bit done, SDPatternOperator node> { + let mayLoad = done in { + let isPseudo = 1, isCodeGenOnly = 1 in { + def "" : EXP_Helper<done, node>, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>; + } + + let done = done in { + def _si : EXP_Helper<done>, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>, + EXPe { + let DecoderNamespace = "SICI"; + let DisableDecoder = DisableSIDecoder; + } + + def _vi : EXP_Helper<done>, + SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>, + EXPe_vi { + let DecoderNamespace = "VI"; + let DisableDecoder = DisableVIDecoder; + } + } } } diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 0aa8e19c61b..b24bcd76035 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -37,7 +37,8 @@ let SubtargetPredicate = isGCN in { // EXP Instructions //===----------------------------------------------------------------------===// -defm EXP : EXP_m; +defm EXP : EXP_m<0, AMDGPUexport>; +defm EXP_DONE : EXP_m<1, AMDGPUexport_done>; //===----------------------------------------------------------------------===// // VINTRP Instructions @@ -388,13 +389,6 @@ def : Pat < (SI_KILL (i32 0xbf800000)) >; -def : Pat < - (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr, - f32:$src0, f32:$src1, f32:$src2, f32:$src3), - (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm, - $src0, $src1, $src2, $src3) ->; - //===----------------------------------------------------------------------===// // VOP1 Patterns //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/llvm/lib/Target/AMDGPU/SIIntrinsics.td index b2857f06cbb..5da37546871 100644 --- a/llvm/lib/Target/AMDGPU/SIIntrinsics.td +++ b/llvm/lib/Target/AMDGPU/SIIntrinsics.td @@ -15,7 +15,20 @@ let TargetPrefix = "SI", isTarget = 1 in { def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; + + def int_SI_export : Intrinsic <[], + [llvm_i32_ty, // en + llvm_i32_ty, // vm (FIXME: should be i1) + llvm_i32_ty, // done (FIXME: should be i1) + llvm_i32_ty, // tgt + llvm_i32_ty, // compr (FIXME: should be i1) + llvm_float_ty, // src0 + llvm_float_ty, // src1 + llvm_float_ty, // src2 + llvm_float_ty], // src3 + [] + >; + def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ; |