diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-03-26 21:06:14 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-03-26 21:06:14 +0000 |
| commit | 28e7bcbba6eed70ad026b28c3d09872275c27812 (patch) | |
| tree | be9c3763409e3f3f5ca07cc6af9c42077b4e547c /llvm/lib/Target | |
| parent | 78fdca3cd55cbb28924542e9a7102a088bb4566c (diff) | |
| download | bcm5719-llvm-28e7bcbba6eed70ad026b28c3d09872275c27812.tar.gz bcm5719-llvm-28e7bcbba6eed70ad026b28c3d09872275c27812.zip | |
[X86] Add WriteCRC32 scheduler class
Currently CRC32 instructions use the WriteFAdd class, this patch splits them off into their own, at the moment it is still mostly just a duplicate of WriteFAdd but it can now be tweaked on a target by target basis.
Differential Revision: https://reviews.llvm.org/D44647
llvm-svn: 328582
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 4 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 1 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 1 |
10 files changed, 15 insertions, 24 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ebaaaebeb11..5ad23994152 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -7074,14 +7074,14 @@ class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>, - Sched<[WriteFAdd]>; + Sched<[WriteCRC32]>; class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, X86MemOperand x86memop, SDPatternOperator Int> : SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))], - IIC_CRC32_MEM>, Sched<[WriteFAddLd, ReadAfterLd]>; + IIC_CRC32_MEM>, Sched<[WriteCRC32Ld, ReadAfterLd]>; let Constraints = "$src1 = $dst" in { def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 6ce52328451..71eb873c5ac 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [BWPort4]>; defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op. defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication. defm : BWWriteResPair<WriteIDiv, [BWPort0, BWDivider], 25, [1, 10]>; +defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part. def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads. diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index eea8c42db5a..e2ea697b47f 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -115,6 +115,7 @@ defm : HWWriteResPair<WriteIMul, [HWPort1], 3>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } defm : HWWriteResPair<WriteShift, [HWPort06], 1>; defm : HWWriteResPair<WriteJump, [HWPort06], 1>; +defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 84b40cfa527..31478380ebf 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -106,6 +106,7 @@ def : WriteRes<WriteIMulH, []> { let Latency = 3; } defm : SBWriteResPair<WriteShift, [SBPort05], 1>; defm : SBWriteResPair<WriteJump, [SBPort5], 1>; +defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on @@ -678,8 +679,6 @@ def: InstRW<[SBWriteResGroup21], (instrs MUL8r, IMUL16rr, IMUL32rr, IMUL32rri, I def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0", "ADD_FST0r", "ADD_FrST0", - "CRC32r(16|32|64)r8", - "CRC32r(16|32|64)r64", "MMX_CVTPI2PSirr", "MMX_CVTPS2PIirr", "MMX_CVTTPS2PIirr", @@ -1416,9 +1415,7 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64", - "CRC32r(16|32|64)m8", - "FCOM32m", +def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m", "FCOM64m", "FCOMP32m", "FCOMP64m")>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 78aec721d37..2a16982346c 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [SKLPort4]>; defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op. defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication. defm : SKLWriteResPair<WriteIDiv, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; // Integer division. +defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part. def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 8b4a394bc3f..c9a9c60ffc0 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [SKXPort4]>; defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication. defm : SKXWriteResPair<WriteIDiv, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; // Integer division. +defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part. def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads. diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index b994a57e27a..85ca7a9b0ad 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -110,6 +110,9 @@ defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer. defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float. defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion. +// CRC32 instruction. +defm WriteCRC32 : X86SchedWritePair; + // Strings instructions. // Packed Compare Implicit Length Strings, Return Mask defm WritePCmpIStrM : X86SchedWritePair; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 502c569f354..f3c18492d89 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -124,6 +124,7 @@ def : WriteRes<WriteRMW, [JSAGU]>; defm : JWriteResIntPair<WriteALU, [JALU01], 1>; defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication defm : JWriteResIntPair<WriteIDiv, [JALU1, JDiv], 41, [1, 41], 2>; // Worst case (i64 division) +defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>; def : WriteRes<WriteIMulH, [JALU1]> { let Latency = 6; @@ -190,22 +191,6 @@ def JWriteIDiv32Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> { def : InstRW<[JWriteIDiv32], (instrs DIV32r, IDIV32r)>; def : InstRW<[JWriteIDiv32Ld], (instrs DIV32m, IDIV32m)>; -def JWriteCRC32 : SchedWriteRes<[JALU01]> { - let Latency = 3; - let ResourceCycles = [4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteCRC32], (instrs CRC32r32r8, CRC32r32r16, CRC32r32r32, - CRC32r64r8, CRC32r64r64)>; - -def JWriteCRC32Ld : SchedWriteRes<[JLAGU, JALU01]> { - let Latency = 6; - let ResourceCycles = [1, 4]; - let NumMicroOps = 3; -} -def : InstRW<[JWriteCRC32Ld], (instrs CRC32r32m8, CRC32r32m16, CRC32r32m32, - CRC32r64m8, CRC32r64m64)>; - //////////////////////////////////////////////////////////////////////////////// // Integer shifts and rotates. //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 94eb5374a69..6fb7e9480c9 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -90,7 +90,8 @@ def : InstRW<[WriteMove], (instrs COPY)>; defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>; defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>; defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>; -defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>; +defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>; +defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>; // This is for simple LEAs with one or two input operands. // The complex ones can only execute on port 1, and they require two cycles on diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index bb5a851b64d..5459485b398 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -151,6 +151,7 @@ defm : ZnWriteResPair<WriteALU, [ZnALU], 1>; defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>; defm : ZnWriteResPair<WriteShift, [ZnALU], 1>; defm : ZnWriteResPair<WriteJump, [ZnALU], 1>; +defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>; // Bit counts. defm : ZnWriteResPair<WriteBitScan, [ZnALU], 3>; |

