diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-11 13:49:19 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-04-11 13:49:19 +0000 |
commit | 89c8a10f7c2e76d230fda7dd1ef0daa632555fd2 (patch) | |
tree | f954f461330af1e45e7dfb51fc9d73a371011afc /llvm/lib | |
parent | 7bcb5720fd38b882f514b428d36fdbbef417ab45 (diff) | |
download | bcm5719-llvm-89c8a10f7c2e76d230fda7dd1ef0daa632555fd2.tar.gz bcm5719-llvm-89c8a10f7c2e76d230fda7dd1ef0daa632555fd2.zip |
[X86] Add variable shuffle schedule classes
Split variable index shuffles from immediate index shuffles
WriteFVarShuffle - variable 'in-lane' shuffles (VPERMILPS/VPERMIL2PS etc.)
WriteVarShuffle - variable 'in-lane' shuffles (PSHUFB/VPPERM etc.)
WriteFVarShuffle256 - variable 'cross-lane' shuffles (VPERMPS etc.)
WriteVarShuffle256 - variable 'cross-lane' shuffles (VPERMD etc.)
Differential Revision: https://reviews.llvm.org/D45404
llvm-svn: 329806
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 12 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrXOP.td | 16 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 8 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 10 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 55 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 32 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 4 |
13 files changed, 55 insertions, 119 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 67528fbab4a..942aa06e740 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -1725,12 +1725,12 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", //===----------------------------------------------------------------------===// // -- VPERMI2 - 3 source operands form -- -let Sched = WriteFShuffle256 in +let Sched = WriteFVarShuffle256 in def AVX512_PERM2_F : OpndItins< IIC_SSE_SHUFP, IIC_SSE_SHUFP >; -let Sched = WriteShuffle256 in +let Sched = WriteVarShuffle256 in def AVX512_PERM2_I : OpndItins< IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI >; @@ -8969,7 +8969,7 @@ let Predicates = [HasDQI, NoBWI] in { // // FIXME: Is there a better scheduler itinerary for VPCOMPRESS/VPEXPAND? -let Sched = WriteShuffle256 in { +let Sched = WriteVarShuffle256 in { def AVX512_COMPRESS : OpndItins< IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM >; diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 06d30b3e62a..a95fdd4c800 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -74,11 +74,12 @@ def MMX_UNPCK_L_ITINS : OpndItins< def MMX_PCK_ITINS : OpndItins< IIC_MMX_PCK_RR, IIC_MMX_PCK_RM >; +} // Sched +let Sched = WriteVarShuffle in def MMX_PSHUF_ITINS : OpndItins< IIC_MMX_PSHUF, IIC_MMX_PSHUF >; -} // Sched let Sched = WriteCvtF2I in { def MMX_CVT_PD_ITINS : OpndItins< diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 4f0ad7f71b2..361f2a9ad34 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5050,7 +5050,7 @@ def SSE_PHADDSUBW : OpndItins< IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM >; } -let Sched = WriteShuffle in +let Sched = WriteVarShuffle in def SSE_PSHUFB : OpndItins< IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM >; @@ -7688,7 +7688,7 @@ defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", // VPERMIL - Permute Single and Double Floating-Point Values // -let Sched = WriteFShuffle in +let Sched = WriteFVarShuffle in def AVX_VPERMILV : OpndItins< IIC_SSE_SHUFP, IIC_SSE_SHUFP >; @@ -7707,13 +7707,13 @@ multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt RC:$src2))))]>, VEX_4V, - Sched<[WriteFShuffle]>; + Sched<[WriteFVarShuffle]>; def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop_i:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (f_vt (X86VPermilpv RC:$src1, (i_vt (bitconvert (i_frag addr:$src2))))))]>, VEX_4V, - Sched<[WriteFShuffleLd, ReadAfterLd]>; + Sched<[WriteFVarShuffleLd, ReadAfterLd]>; def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, u8imm:$src2), @@ -8181,10 +8181,10 @@ multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, } } -defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256, +defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteVarShuffle256, i256mem>; let ExeDomain = SSEPackedSingle in -defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256, +defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFVarShuffle256, f256mem>; multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index 0aaaeebefa2..ae707aecbef 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -279,7 +279,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 VR128:$src3))))]>, - XOP_4V, Sched<[WriteShuffle]>; + XOP_4V, Sched<[WriteVarShuffle]>; def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i128mem:$src3), !strconcat(OpcodeStr, @@ -287,7 +287,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2), (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>, - XOP_4V, VEX_W, Sched<[WriteShuffleLd, ReadAfterLd, ReadAfterLd]>; + XOP_4V, VEX_W, Sched<[WriteVarShuffleLd, ReadAfterLd, ReadAfterLd]>; def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, VR128:$src3), !strconcat(OpcodeStr, @@ -295,7 +295,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))), (vt128 VR128:$src3))))]>, - XOP_4V, Sched<[WriteShuffleLd, ReadAfterLd, + XOP_4V, Sched<[WriteVarShuffleLd, ReadAfterLd, // 128mem:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, @@ -307,7 +307,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode, (ins VR128:$src1, VR128:$src2, VR128:$src3), !strconcat(OpcodeStr, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), - []>, XOP_4V, VEX_W, Sched<[WriteShuffle]>, FoldGenData<NAME#rrr>; + []>, XOP_4V, VEX_W, Sched<[WriteVarShuffle]>, FoldGenData<NAME#rrr>; } let ExeDomain = SSEPackedInt in { @@ -367,7 +367,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), [(set RC:$dst, (VT (X86vpermil2 RC:$src1, RC:$src2, RC:$src3, (i8 imm:$src4))))]>, - Sched<[WriteFShuffle]>; + Sched<[WriteFVarShuffle]>; def rm : IXOP5<Opc, MRMSrcMemOp4, (outs RC:$dst), (ins RC:$src1, RC:$src2, intmemop:$src3, u8imm:$src4), !strconcat(OpcodeStr, @@ -376,7 +376,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC, (VT (X86vpermil2 RC:$src1, RC:$src2, (bitconvert (IntLdFrag addr:$src3)), (i8 imm:$src4))))]>, VEX_W, - Sched<[WriteFShuffleLd, ReadAfterLd, ReadAfterLd]>; + Sched<[WriteFVarShuffleLd, ReadAfterLd, ReadAfterLd]>; def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, fpmemop:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, @@ -384,7 +384,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (VT (X86vpermil2 RC:$src1, (FPLdFrag addr:$src2), RC:$src3, (i8 imm:$src4))))]>, - Sched<[WriteFShuffleLd, ReadAfterLd, + Sched<[WriteFVarShuffleLd, ReadAfterLd, // fpmemop:$src2 ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, // RC:$src3 @@ -395,7 +395,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC, (ins RC:$src1, RC:$src2, RC:$src3, u8imm:$src4), !strconcat(OpcodeStr, "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"), - []>, VEX_W, Sched<[WriteFShuffle]>, FoldGenData<NAME#rr>; + []>, VEX_W, Sched<[WriteFVarShuffle]>, FoldGenData<NAME#rr>; } let ExeDomain = SSEPackedDouble in { diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 1d80920e566..79a25959262 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -162,6 +162,7 @@ defm : BWWriteResPair<WriteFRcp, [BWPort0], 5>; // Floating point reciprocal e defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5>; // Floating point reciprocal square root estimate. defm : BWWriteResPair<WriteFMA, [BWPort01], 5>; // Fused Multiply Add. defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1>; // Floating point vector shuffles. +defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1>; // Floating point vector variable shuffles. defm : BWWriteResPair<WriteFBlend, [BWPort015], 1>; // Floating point vector blends. defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2]>; // Fp vector variable blends. @@ -178,6 +179,7 @@ defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts. defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply. defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD defm : BWWriteResPair<WriteShuffle, [BWPort5], 1>; // Vector shuffles. +defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1>; // Vector variable shuffles. defm : BWWriteResPair<WriteBlend, [BWPort15], 1>; // Vector blends. defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2]>; // Vector variable blends. defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 6, [1, 2]>; // Vector MPSAD. @@ -288,7 +290,9 @@ def : WriteRes<WriteSystem, [BWPort0156]> { let Latency = 100; } // def Writ // AVX2. defm : BWWriteResPair<WriteFShuffle256, [BWPort5], 3>; // Fp 256-bit width vector shuffles. +defm : BWWriteResPair<WriteFVarShuffle256, [BWPort5], 3>; // Fp 256-bit width vector variable shuffles. defm : BWWriteResPair<WriteShuffle256, [BWPort5], 3>; // 256-bit width vector shuffles. +defm : BWWriteResPair<WriteVarShuffle256, [BWPort5], 3>; // 256-bit width vector variable shuffles. defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 2, [2, 1]>; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -366,7 +370,6 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr", "MMX_MOVQ2DQrr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -404,9 +407,7 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr", "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)PMOVSXBDrr", "(V?)PMOVSXBQrr", "(V?)PMOVSXBWrr", @@ -419,7 +420,6 @@ def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr", "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFB(Y?)rr", "(V?)PSHUFD(Y?)ri", "(V?)PSHUFHW(Y?)ri", "(V?)PSHUFLW(Y?)ri", @@ -891,9 +891,7 @@ def: InstRW<[BWWriteResGroup28], (instregex "VBROADCASTSDYrr", "VPBROADCASTW(Y?)rr", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", "VPERMPDYri", - "VPERMPSYrr", "VPERMQYri", "VPMOVSXBDYrr", "VPMOVSXBQYrr", diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 29f120f4edb..8022ddad111 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -159,8 +159,10 @@ defm : HWWriteResPair<WriteCvtI2F, [HWPort1], 4>; defm : HWWriteResPair<WriteCvtF2F, [HWPort1], 3>; defm : HWWriteResPair<WriteFMA, [HWPort01], 5>; defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1>; +defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1>; defm : HWWriteResPair<WriteFBlend, [HWPort015], 1>; defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3>; +defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3>; defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2]>; // Vector integer operations. @@ -174,8 +176,10 @@ defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>; defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>; defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>; defm : HWWriteResPair<WriteShuffle, [HWPort5], 1>; +defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1>; defm : HWWriteResPair<WriteBlend, [HWPort15], 1>; defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3>; +defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3>; defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2]>; defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 2, [2, 1]>; defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 6, [1, 2]>; @@ -724,7 +728,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr", "MMX_MOVQ2DQrr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -762,9 +765,7 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr", "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)PMOVSXBDrr", "(V?)PMOVSXBQrr", "(V?)PMOVSXBWrr", @@ -777,7 +778,6 @@ def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr", "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFB(Y?)rr", "(V?)PSHUFD(Y?)ri", "(V?)PSHUFHW(Y?)ri", "(V?)PSHUFLW(Y?)ri", @@ -1780,9 +1780,7 @@ def: InstRW<[HWWriteResGroup51], (instregex "VBROADCASTSDYrr", "VPBROADCASTWrr", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", "VPERMPDYri", - "VPERMPSYrr", "VPERMQYri", "VPMOVSXBDYrr", "VPMOVSXBQYrr", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 5a2121f0770..aefbfb64cf8 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -148,6 +148,7 @@ defm : SBWriteResPair<WriteCvtF2I, [SBPort1], 3>; defm : SBWriteResPair<WriteCvtI2F, [SBPort1], 4>; defm : SBWriteResPair<WriteCvtF2F, [SBPort1], 3>; defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1>; +defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1>; defm : SBWriteResPair<WriteFBlend, [SBPort05], 1>; defm : SBWriteResPair<WriteFVarBlend, [SBPort0, SBPort5], 2>; @@ -162,6 +163,7 @@ defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>; defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>; defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model defm : SBWriteResPair<WriteShuffle, [SBPort5], 1>; +defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1>; defm : SBWriteResPair<WriteBlend, [SBPort15], 1>; defm : SBWriteResPair<WriteVarBlend, [SBPort1, SBPort5], 2>; defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 5, [1,2], 3>; @@ -275,7 +277,9 @@ def : WriteRes<WriteNop, []>; // AVX2/FMA is not supported on that architecture, but we should define the basic // scheduling resources anyway. defm : SBWriteResPair<WriteFShuffle256, [SBPort0], 1>; +defm : SBWriteResPair<WriteFVarShuffle256, [SBPort0], 1>; defm : SBWriteResPair<WriteShuffle256, [SBPort0], 1>; +defm : SBWriteResPair<WriteVarShuffle256, [SBPort0], 1>; defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1>; defm : SBWriteResPair<WriteFMA, [SBPort01], 5>; @@ -352,9 +356,7 @@ def: InstRW<[SBWriteResGroup2], (instregex "FDECSTP", "(V?)ORPS(Y?)rr", "VPERM2F128rr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)SHUFPD(Y?)rri", "(V?)SHUFPS(Y?)rri", "(V?)UNPCKHPD(Y?)rr", @@ -408,7 +410,6 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr", "MMX_PABSWrr", "MMX_PADDQirr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSIGNBrr", "MMX_PSIGNDrr", "MMX_PSIGNWrr", @@ -462,7 +463,6 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABSBrr", "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFBrr", "(V?)PSHUFDri", "(V?)PSHUFHWri", "(V?)PSHUFLWri", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 59e05df9904..6511206992d 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -159,6 +159,7 @@ defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 5>; // Floating point reciprocal defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 5>; // Floating point reciprocal square root estimate. defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4>; // Fused Multiply Add. defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1>; // Floating point vector shuffles. +defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1>; // Floating point vector shuffles. defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1>; // Floating point vector blends. defm : SKLWriteResPair<WriteFVarBlend, [SKLPort5], 2, [2]>; // Fp vector variable blends. @@ -175,6 +176,7 @@ defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts. defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply. defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1>; // Vector shuffles. +defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1>; // Vector shuffles. defm : SKLWriteResPair<WriteBlend, [SKLPort15], 1>; // Vector blends. defm : SKLWriteResPair<WriteVarBlend, [SKLPort5], 2, [2]>; // Vector variable blends. defm : SKLWriteResPair<WriteMPSAD, [SKLPort0, SKLPort5], 6, [1, 2]>; // Vector MPSAD. @@ -294,7 +296,9 @@ def : WriteRes<WriteSystem, [SKLPort0156]> { let Latency = 100; } // def Wri // AVX2. defm : SKLWriteResPair<WriteFShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector shuffles. +defm : SKLWriteResPair<WriteFVarShuffle256, [SKLPort5], 3>; // Fp 256-bit width vector variable shuffles. defm : SKLWriteResPair<WriteShuffle256, [SKLPort5], 3>; // 256-bit width vector shuffles. +defm : SKLWriteResPair<WriteVarShuffle256, [SKLPort5], 3>; // 256-bit width vector variable shuffles. defm : SKLWriteResPair<WriteVarVecShift, [SKLPort0, SKLPort5], 2, [2, 1]>; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -367,7 +371,6 @@ def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r", "MMX_MOVD64rr", "MMX_MOVD64to64rr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -397,9 +400,7 @@ def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r", "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPD(Y?)ri", - "VPERMILPD(Y?)rr", "VPERMILPS(Y?)ri", - "VPERMILPS(Y?)rr", "(V?)PMOVSXBDrr", "(V?)PMOVSXBQrr", "(V?)PMOVSXBWrr", @@ -412,7 +413,6 @@ def: InstRW<[SKLWriteResGroup3], (instregex "COMP_FST0r", "(V?)PMOVZXDQrr", "(V?)PMOVZXWDrr", "(V?)PMOVZXWQrr", - "(V?)PSHUFB(Y?)rr", "(V?)PSHUFD(Y?)ri", "(V?)PSHUFHW(Y?)ri", "(V?)PSHUFLW(Y?)ri", @@ -884,9 +884,7 @@ def: InstRW<[SKLWriteResGroup30], (instregex "ADD_FPrST0", "(V?)PCMPGTQ(Y?)rr", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", "VPERMPDYri", - "VPERMPSYrr", "VPERMQYri", "VPMOVSXBDYrr", "VPMOVSXBQYrr", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index c7c7e2ab0b8..5a79f240273 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -159,6 +159,7 @@ defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 5>; // Floating point reciprocal defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 5>; // Floating point reciprocal square root estimate. defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4>; // Fused Multiply Add. defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1>; // Floating point vector shuffles. +defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1>; // Floating point vector variable shuffles. defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1>; // Floating point vector blends. defm : SKXWriteResPair<WriteFVarBlend, [SKXPort5], 2, [2]>; // Fp vector variable blends. @@ -175,6 +176,7 @@ defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts. defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply. defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply. defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1>; // Vector shuffles. +defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1>; // Vector variable shuffles. defm : SKXWriteResPair<WriteBlend, [SKXPort15], 1>; // Vector blends. defm : SKXWriteResPair<WriteVarBlend, [SKXPort5], 2, [2]>; // Vector variable blends. defm : SKXWriteResPair<WriteMPSAD, [SKXPort0, SKXPort5], 6, [1, 2]>; // Vector MPSAD. @@ -294,7 +296,9 @@ def : WriteRes<WriteSystem, [SKXPort0156]> { let Latency = 100; } // def Wri // AVX2. defm : SKXWriteResPair<WriteFShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector shuffles. +defm : SKXWriteResPair<WriteFVarShuffle256, [SKXPort5], 3>; // Fp 256-bit width vector variable shuffles. defm : SKXWriteResPair<WriteShuffle256, [SKXPort5], 3>; // 256-bit width vector shuffles. +defm : SKXWriteResPair<WriteVarShuffle256, [SKXPort5], 3>; // 256-bit width vector variable shuffles. defm : SKXWriteResPair<WriteVarVecShift, [SKXPort0, SKXPort5], 2, [2, 1]>; // Variable vector shifts. // Old microcoded instructions that nobody use. @@ -412,7 +416,6 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r", "MMX_MOVD64rr", "MMX_MOVD64to64rr", "MMX_PALIGNRrri", - "MMX_PSHUFBrr", "MMX_PSHUFWri", "MMX_PUNPCKHBWirr", "MMX_PUNPCKHDQirr", @@ -447,7 +450,6 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r", "PMOVZXDQrr", "PMOVZXWDrr", "PMOVZXWQrr", - "PSHUFBrr", "PSHUFDri", "PSHUFHWri", "PSHUFLWri", @@ -530,25 +532,15 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r", "VPBROADCASTDrr", "VPBROADCASTQrr", "VPERMILPDYri", - "VPERMILPDYrr", "VPERMILPDZ128ri", - "VPERMILPDZ128rr", "VPERMILPDZ256ri", - "VPERMILPDZ256rr", "VPERMILPDZri", - "VPERMILPDZrr", "VPERMILPDri", - "VPERMILPDrr", "VPERMILPSYri", - "VPERMILPSYrr", "VPERMILPSZ128ri", - "VPERMILPSZ128rr", "VPERMILPSZ256ri", - "VPERMILPSZ256rr", "VPERMILPSZri", - "VPERMILPSZrr", "VPERMILPSri", - "VPERMILPSrr", "VPMOVSXBDrr", "VPMOVSXBQrr", "VPMOVSXBWrr", @@ -561,11 +553,6 @@ def: InstRW<[SKXWriteResGroup3], (instregex "COMP_FST0r", "VPMOVZXDQrr", "VPMOVZXWDrr", "VPMOVZXWQrr", - "VPSHUFBYrr", - "VPSHUFBZ128rr", - "VPSHUFBZ256rr", - "VPSHUFBZrr", - "VPSHUFBrr", "VPSHUFDYri", "VPSHUFDZ128ri", "VPSHUFDZ256ri", @@ -1859,46 +1846,12 @@ def: InstRW<[SKXWriteResGroup32], (instregex "ADD_FPrST0", "VPCMPWZrri", "VPERM2F128rr", "VPERM2I128rr", - "VPERMDYrr", - "VPERMDZ256rr", - "VPERMDZrr", - "VPERMI2D128rr", - "VPERMI2D256rr", - "VPERMI2Drr", - "VPERMI2PD128rr", - "VPERMI2PD256rr", - "VPERMI2PDrr", - "VPERMI2PS128rr", - "VPERMI2PS256rr", - "VPERMI2PSrr", - "VPERMI2Q128rr", - "VPERMI2Q256rr", - "VPERMI2Qrr", "VPERMPDYri", "VPERMPDZ256ri", - "VPERMPDZ256rr", "VPERMPDZri", - "VPERMPDZrr", - "VPERMPSYrr", - "VPERMPSZ256rr", - "VPERMPSZrr", "VPERMQYri", "VPERMQZ256ri", - "VPERMQZ256rr", "VPERMQZri", - "VPERMQZrr", - "VPERMT2D128rr", - "VPERMT2D256rr", - "VPERMT2Drr", - "VPERMT2PD128rr", - "VPERMT2PD256rr", - "VPERMT2PDrr", - "VPERMT2PS128rr", - "VPERMT2PS256rr", - "VPERMT2PSrr", - "VPERMT2Q128rr", - "VPERMT2Q256rr", - "VPERMT2Qrr", "VPMAXSQZ128rr", "VPMAXSQZ256rr", "VPMAXSQZrr", diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 7f381a891ad..0dc5f7288d5 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -87,6 +87,7 @@ defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. +defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. @@ -106,6 +107,7 @@ defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. defm WritePMULLD : X86SchedWritePair; // PMULLD defm WriteShuffle : X86SchedWritePair; // Vector shuffles. +defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. defm WriteBlend : X86SchedWritePair; // Vector blends. defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. @@ -150,7 +152,9 @@ def WriteSystem : SchedWrite; // AVX2. defm WriteFShuffle256 : X86SchedWritePair; // Fp 256-bit width vector shuffles. +defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuffles. defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. +defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. // Old microcoded instructions that nobody use. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index f9dfefbc915..2ab593af326 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -301,9 +301,11 @@ defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>; defm : JWriteResFpuPair<WriteFSqrt, [JFPU1, JFPM], 21, [1, 21]>; defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>; +defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>; defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>; +defm : JWriteResFpuPair<WriteFVarShuffle256, [JFPU01, JFPX], 1>; // NOTE: Doesn't exist on Jaguar. //////////////////////////////////////////////////////////////////////////////// // Conversions. @@ -367,10 +369,12 @@ defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>; defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>; defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>; defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. //////////////////////////////////////////////////////////////////////////////// @@ -750,34 +754,6 @@ def JWriteVCVTPDYLd: SchedWriteRes<[JLAGU, JFPU1, JSTC, JFPX]> { } def : InstRW<[JWriteVCVTPDYLd, ReadAfterLd], (instrs VCVTPD2DQYrm, VCVTTPD2DQYrm, VCVTPD2PSYrm)>; -def JWritePSHUFB: SchedWriteRes<[JFPU01, JVALU]> { - let Latency = 2; - let ResourceCycles = [1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWritePSHUFB], (instrs MMX_PSHUFBrr, PSHUFBrr, VPSHUFBrr)>; - -def JWritePSHUFBLd: SchedWriteRes<[JLAGU, JFPU01, JVALU]> { - let Latency = 7; - let ResourceCycles = [1, 1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWritePSHUFBLd, ReadAfterLd], (instrs MMX_PSHUFBrm, PSHUFBrm, VPSHUFBrm)>; - -def JWriteVPERM: SchedWriteRes<[JFPU01, JFPX]> { - let Latency = 2; - let ResourceCycles = [1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteVPERM], (instrs VPERMILPDrr, VPERMILPSrr)>; - -def JWriteVPERMLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { - let Latency = 7; - let ResourceCycles = [1, 1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteVPERMLd, ReadAfterLd], (instrs VPERMILPDrm, VPERMILPSrm)>; - def JWriteVPERMY: SchedWriteRes<[JFPU01, JFPX]> { let Latency = 3; let ResourceCycles = [2, 6]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index a712a188aa0..8147c94a2e7 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -134,6 +134,7 @@ defm : SLMWriteResPair<WriteCvtF2I, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtI2F, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtF2F, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>; // Vector integer operations. @@ -149,6 +150,7 @@ defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>; //defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>; defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>; @@ -255,7 +257,9 @@ def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>; defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFShuffle256, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteFVarShuffle256, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteShuffle256, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteVarShuffle256, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFMA, [SLM_FPC_RSV0], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 3018e0f20ba..a88c680a7e1 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -201,6 +201,7 @@ defm : ZnWriteResFpuPair<WriteCvtF2F, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtF2I, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>; defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>; +defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU0], 5>; defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>; defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>; @@ -219,8 +220,10 @@ defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>; defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4>; // FIXME defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>; +defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>; // Vector Shift Operations defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; @@ -241,6 +244,7 @@ def : WriteRes<WriteNop, []>; // Following instructions with latency=100 are microcoded. // We set long latency so as to block the entire pipeline. defm : ZnWriteResFpuPair<WriteFShuffle256, [ZnFPU], 100>; +defm : ZnWriteResFpuPair<WriteFVarShuffle256, [ZnFPU], 100>; //Microcoded Instructions let Latency = 100 in { |