diff options
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 19 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 16 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 18 |
10 files changed, 55 insertions, 64 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 5eaf55b09f2..1dd464c1c3b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -2378,30 +2378,35 @@ let Predicates = [HasBMI] in { multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC, X86MemOperand x86memop, Intrinsic Int, - PatFrag ld_frag> { + PatFrag ld_frag, X86FoldableSchedWrite Sched> { def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)], IIC_BIN_NONMEM>, - T8PS, VEX, Sched<[WriteALU]>; + T8PS, VEX, Sched<[Sched]>; def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)), (implicit EFLAGS)], IIC_BIN_MEM>, T8PS, VEX, - Sched<[WriteALULd, ReadAfterLd]>; + Sched<[Sched.Folded, + // x86memop:$src1 + ReadDefault, ReadDefault, ReadDefault, ReadDefault, + ReadDefault, + // RC:$src2 + ReadAfterLd]>; } let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem, - int_x86_bmi_bextr_32, loadi32>; + int_x86_bmi_bextr_32, loadi32, WriteBEXTR>; defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem, - int_x86_bmi_bextr_64, loadi64>, VEX_W; + int_x86_bmi_bextr_64, loadi64, WriteBEXTR>, VEX_W; } let Predicates = [HasBMI2], Defs = [EFLAGS] in { defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem, - int_x86_bmi_bzhi_32, loadi32>; + int_x86_bmi_bzhi_32, loadi32, WriteBZHI>; defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem, - int_x86_bmi_bzhi_64, loadi64>, VEX_W; + int_x86_bmi_bzhi_64, loadi64, WriteBZHI>, VEX_W; } def CountTrailingOnes : SDNodeXForm<imm, [{ diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index db7ae8a6fa6..2c264e391a1 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -120,6 +120,10 @@ defm : BWWriteResPair<WritePOPCNT, [BWPort1], 3>; // Integer shifts and rotates. defm : BWWriteResPair<WriteShift, [BWPort06], 1>; +// BMI1 BEXTR, BMI2 BZHI +defm : BWWriteResPair<WriteBEXTR, [BWPort06,BWPort15], 2, [1,1], 2>; +defm : BWWriteResPair<WriteBZHI, [BWPort15], 1>; + // Loads, stores, and moves, not folded with other operations. def : WriteRes<WriteLoad, [BWPort23]> { let Latency = 5; } def : WriteRes<WriteStore, [BWPort237, BWPort4]>; @@ -492,7 +496,6 @@ def: InstRW<[BWWriteResGroup7], (instregex "ANDN(32|64)rr", "BLSI(32|64)rr", "BLSMSK(32|64)rr", "BLSR(32|64)rr", - "BZHI(32|64)rr", "LEA(16|32|64)(_32)?r", "MMX_PABSBrr", "MMX_PABSDrr", @@ -780,8 +783,7 @@ def BWWriteResGroup19 : SchedWriteRes<[BWPort06,BWPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup19], (instregex "BEXTR(32|64)rr", - "BSWAP(16|32|64)r")>; +def: InstRW<[BWWriteResGroup19], (instregex "BSWAP(16|32|64)r")>; def BWWriteResGroup20 : SchedWriteRes<[BWPort06,BWPort0156]> { let Latency = 2; @@ -1442,7 +1444,6 @@ def: InstRW<[BWWriteResGroup64], (instregex "ANDN(32|64)rm", "BLSI(32|64)rm", "BLSMSK(32|64)rm", "BLSR(32|64)rm", - "BZHI(32|64)rm", "MMX_PABSBrm", "MMX_PABSDrm", "MMX_PABSWrm", @@ -1833,13 +1834,6 @@ def BWWriteResGroup84 : SchedWriteRes<[BWPort6,BWPort23,BWPort0156]> { def: InstRW<[BWWriteResGroup84], (instregex "LRETQ", "RETQ")>; -def BWWriteResGroup85 : SchedWriteRes<[BWPort23,BWPort06,BWPort15]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup85], (instregex "BEXTR(32|64)rm")>; - def BWWriteResGroup86 : SchedWriteRes<[BWPort23,BWPort06,BWPort0156]> { let Latency = 7; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 97825257cb7..bd16dc6d530 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -128,6 +128,10 @@ defm : HWWriteResPair<WriteLZCNT, [HWPort1], 3>; defm : HWWriteResPair<WriteTZCNT, [HWPort1], 3>; defm : HWWriteResPair<WritePOPCNT, [HWPort1], 3>; +// BMI1 BEXTR, BMI2 BZHI +defm : HWWriteResPair<WriteBEXTR, [HWPort06,HWPort15], 2, [1,1], 2>; +defm : HWWriteResPair<WriteBZHI, [HWPort15], 1>; + // This is quite rough, latency depends on the dividend. defm : HWWriteResPair<WriteIDiv, [HWPort0, HWDivider], 25, [1,10], 1, 4>; // Scalar and vector floating point. @@ -844,7 +848,6 @@ def: InstRW<[HWWriteResGroup8], (instregex "ANDN(32|64)rr", "BLSI(32|64)rr", "BLSMSK(32|64)rr", "BLSR(32|64)rr", - "BZHI(32|64)rr", "LEA(16|32|64)(_32)?r", "MMX_PABSBrr", "MMX_PABSDrr", @@ -1230,7 +1233,6 @@ def: InstRW<[HWWriteResGroup16], (instregex "ANDN(32|64)rm", "BLSI(32|64)rm", "BLSMSK(32|64)rm", "BLSR(32|64)rm", - "BZHI(32|64)rm", "MMX_PABSBrm", "MMX_PABSDrm", "MMX_PABSWrm", @@ -1606,8 +1608,7 @@ def HWWriteResGroup34 : SchedWriteRes<[HWPort06,HWPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup34], (instregex "BEXTR(32|64)rr", - "BSWAP(16|32|64)r")>; +def: InstRW<[HWWriteResGroup34], (instregex "BSWAP(16|32|64)r")>; def HWWriteResGroup35 : SchedWriteRes<[HWPort06,HWPort0156]> { let Latency = 2; @@ -1711,13 +1712,6 @@ def: InstRW<[HWWriteResGroup41], (instregex "LRETQ", "RETL", "RETQ")>; -def HWWriteResGroup42 : SchedWriteRes<[HWPort23,HWPort06,HWPort15]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup42], (instregex "BEXTR(32|64)rm")>; - def HWWriteResGroup43 : SchedWriteRes<[HWPort23,HWPort06,HWPort0156]> { let Latency = 7; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index e5fc16844bf..7316de6af72 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -119,6 +119,11 @@ defm : SBWriteResPair<WriteLZCNT, [SBPort1], 3, [1], 1, 5>; defm : SBWriteResPair<WriteTZCNT, [SBPort1], 3, [1], 1, 5>; defm : SBWriteResPair<WritePOPCNT, [SBPort1], 3, [1], 1, 5>; +// BMI1 BEXTR, BMI2 BZHI +// NOTE: These don't exist on Sandy Bridge. Ports are guesses. +defm : SBWriteResPair<WriteBEXTR, [SBPort05,SBPort1], 2, [1,1], 2>; +defm : SBWriteResPair<WriteBZHI, [SBPort1], 1>; + // Scalar and vector floating point. def : WriteRes<WriteFStore, [SBPort23, SBPort4]>; def : WriteRes<WriteFLoad, [SBPort23]> { let Latency = 6; } diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index bceb43541d6..2a6658e31ae 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -120,6 +120,10 @@ defm : SKLWriteResPair<WritePOPCNT, [SKLPort1], 3>; // Integer shifts and rotates. defm : SKLWriteResPair<WriteShift, [SKLPort06], 1>; +// BMI1 BEXTR, BMI2 BZHI +defm : SKLWriteResPair<WriteBEXTR, [SKLPort06,SKLPort15], 2, [1,1], 2>; +defm : SKLWriteResPair<WriteBZHI, [SKLPort15], 1>; + // Loads, stores, and moves, not folded with other operations. def : WriteRes<WriteLoad, [SKLPort23]> { let Latency = 5; } def : WriteRes<WriteStore, [SKLPort237, SKLPort4]>; @@ -558,7 +562,6 @@ def: InstRW<[SKLWriteResGroup8], (instregex "ANDN(32|64)rr", "BLSI(32|64)rr", "BLSMSK(32|64)rr", "BLSR(32|64)rr", - "BZHI(32|64)rr", "LEA(16|32|64)(_32)?r")>; def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> { @@ -802,8 +805,7 @@ def SKLWriteResGroup22 : SchedWriteRes<[SKLPort06,SKLPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup22], (instregex "BEXTR(32|64)rr", - "BSWAP(16|32|64)r")>; +def: InstRW<[SKLWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; def SKLWriteResGroup23 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 2; @@ -1464,7 +1466,6 @@ def: InstRW<[SKLWriteResGroup75], (instregex "ANDN(32|64)rm", "BLSI(32|64)rm", "BLSMSK(32|64)rm", "BLSR(32|64)rm", - "BZHI(32|64)rm", "MOVBE(16|32|64)rm")>; def SKLWriteResGroup76 : SchedWriteRes<[SKLPort23,SKLPort0156]> { @@ -1806,13 +1807,6 @@ def SKLWriteResGroup98 : SchedWriteRes<[SKLPort6,SKLPort23,SKLPort0156]> { def: InstRW<[SKLWriteResGroup98], (instregex "LRETQ", "RETQ")>; -def SKLWriteResGroup99 : SchedWriteRes<[SKLPort23,SKLPort06,SKLPort15]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup99], (instregex "BEXTR(32|64)rm")>; - def SKLWriteResGroup100 : SchedWriteRes<[SKLPort4,SKLPort23,SKLPort237,SKLPort06]> { let Latency = 7; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index d563ea2ebdd..7f336fde980 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -120,6 +120,10 @@ defm : SKXWriteResPair<WriteLZCNT, [SKXPort1], 3>; defm : SKXWriteResPair<WriteTZCNT, [SKXPort1], 3>; defm : SKXWriteResPair<WritePOPCNT, [SKXPort1], 3>; +// BMI1 BEXTR, BMI2 BZHI +defm : SKXWriteResPair<WriteBEXTR, [SKXPort06,SKXPort15], 2, [1,1], 2>; +defm : SKXWriteResPair<WriteBZHI, [SKXPort15], 1>; + // Loads, stores, and moves, not folded with other operations. def : WriteRes<WriteLoad, [SKXPort23]> { let Latency = 5; } def : WriteRes<WriteStore, [SKXPort237, SKXPort4]>; @@ -1034,7 +1038,6 @@ def: InstRW<[SKXWriteResGroup8], (instregex "ANDN(32|64)rr", "BLSI(32|64)rr", "BLSMSK(32|64)rr", "BLSR(32|64)rr", - "BZHI(32|64)rr", "LEA(16|32|64)(_32)?r")>; def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { @@ -1597,8 +1600,7 @@ def SKXWriteResGroup22 : SchedWriteRes<[SKXPort06,SKXPort15]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup22], (instregex "BEXTR(32|64)rr", - "BSWAP(16|32|64)r")>; +def: InstRW<[SKXWriteResGroup22], (instregex "BSWAP(16|32|64)r")>; def SKXWriteResGroup23 : SchedWriteRes<[SKXPort06,SKXPort0156]> { let Latency = 2; @@ -3094,7 +3096,6 @@ def: InstRW<[SKXWriteResGroup79], (instregex "ANDN(32|64)rm", "BLSI(32|64)rm", "BLSMSK(32|64)rm", "BLSR(32|64)rm", - "BZHI(32|64)rm", "MOVBE(16|32|64)rm")>; def SKXWriteResGroup80 : SchedWriteRes<[SKXPort23,SKXPort015]> { @@ -3753,13 +3754,6 @@ def SKXWriteResGroup104 : SchedWriteRes<[SKXPort6,SKXPort23,SKXPort0156]> { def: InstRW<[SKXWriteResGroup104], (instregex "LRETQ", "RETQ")>; -def SKXWriteResGroup105 : SchedWriteRes<[SKXPort23,SKXPort06,SKXPort15]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup105], (instregex "BEXTR(32|64)rm")>; - def SKXWriteResGroup106 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { let Latency = 7; let NumMicroOps = 4; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 6136d96fcfb..b5cb26cee8e 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -54,6 +54,10 @@ defm WriteTZCNT : X86SchedWritePair; // Trailing zero count. // Integer shifts and rotates. defm WriteShift : X86SchedWritePair; +// BMI1 BEXTR, BMI2 BZHI +defm WriteBEXTR : X86SchedWritePair; +defm WriteBZHI : X86SchedWritePair; + // Loads, stores, and moves, not folded with other operations. def WriteLoad : SchedWrite; def WriteStore : SchedWrite; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d61d27267f7..2994b31fe66 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -141,6 +141,10 @@ defm : JWriteResIntPair<WritePOPCNT, [JALU01], 1>; defm : JWriteResIntPair<WriteLZCNT, [JALU01], 1>; defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2]>; +// BMI1 BEXTR, BMI2 BZHI +defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>; +defm : JWriteResIntPair<WriteBZHI, [JALU01], 1>; // NOTE: Doesn't exist on Jaguar. + def JWriteIMul64 : SchedWriteRes<[JALU1, JMul]> { let Latency = 6; let ResourceCycles = [1, 4]; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 87b1bf26c6e..64a2ec1a103 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -104,6 +104,11 @@ defm : SLMWriteResPair<WriteLZCNT, [SLM_IEC_RSV0], 3>; defm : SLMWriteResPair<WriteTZCNT, [SLM_IEC_RSV0], 3>; defm : SLMWriteResPair<WritePOPCNT, [SLM_IEC_RSV0], 3>; +// BMI1 BEXTR, BMI2 BZHI +// NOTE: These don't exist on Silvermont. Ports are guesses. +defm : SBWriteResPair<WriteBEXTR, [SLM_IEC_RSV0], 1>; +defm : SBWriteResPair<WriteBZHI, [SLM_IEC_RSV0], 1>; + // This is quite rough, latency depends on the dividend. defm : SLMWriteResPair<WriteIDiv, [SLM_IEC_RSV01, SLMDivider], 25, [1,25], 1, 4>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 252243c6a7a..33472c8252c 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -162,6 +162,10 @@ defm : ZnWriteResPair<WritePOPCNT, [ZnALU], 1>; // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; +// BMI1 BEXTR, BMI2 BZHI +defm : ZnWriteResPair<WriteBEXTR, [ZnALU], 1>; +defm : ZnWriteResPair<WriteBZHI, [ZnALU], 1>; + // IDIV def : WriteRes<WriteIDiv, [ZnALU2, ZnDivider]> { let Latency = 41; @@ -564,25 +568,13 @@ def : InstRW<[ZnWriteALULat2], (instregex "BLS(I|MSK|R)(32|64)rr")>; // r,m. def : InstRW<[ZnWriteALULat2Ld, ReadAfterLd], (instregex "BLS(I|MSK|R)(32|64)rm")>; -// BEXTR. -// r,r,r. -def : InstRW<[WriteALU], (instregex "BEXTR(32|64)rr")>; -// r,m,r. -def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BEXTR(32|64)rm")>; - -// BZHI. -// r,r,r. -def : InstRW<[WriteALU], (instregex "BZHI(32|64)rr")>; -// r,m,r. -def : InstRW<[WriteALULd, ReadAfterLd], (instregex "BZHI(32|64)rm")>; - // CLD STD. def : InstRW<[WriteALU], (instregex "STD", "CLD")>; // PDEP PEXT. // r,r,r. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; -// r,m,r. +// r,r,m. def : InstRW<[WriteMicrocoded], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; // ROR ROL. |

