summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-03-26 21:06:14 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-03-26 21:06:14 +0000
commit28e7bcbba6eed70ad026b28c3d09872275c27812 (patch)
treebe9c3763409e3f3f5ca07cc6af9c42077b4e547c /llvm/lib/Target
parent78fdca3cd55cbb28924542e9a7102a088bb4566c (diff)
downloadbcm5719-llvm-28e7bcbba6eed70ad026b28c3d09872275c27812.tar.gz
bcm5719-llvm-28e7bcbba6eed70ad026b28c3d09872275c27812.zip
[X86] Add WriteCRC32 scheduler class
Currently CRC32 instructions use the WriteFAdd class, this patch splits them off into their own, at the moment it is still mostly just a duplicate of WriteFAdd but it can now be tweaked on a target by target basis. Differential Revision: https://reviews.llvm.org/D44647 llvm-svn: 328582
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td4
-rwxr-xr-xllvm/lib/Target/X86/X86SchedBroadwell.td1
-rw-r--r--llvm/lib/Target/X86/X86SchedHaswell.td1
-rw-r--r--llvm/lib/Target/X86/X86SchedSandyBridge.td7
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td1
-rwxr-xr-xllvm/lib/Target/X86/X86SchedSkylakeServer.td1
-rw-r--r--llvm/lib/Target/X86/X86Schedule.td3
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td17
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td3
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td1
10 files changed, 15 insertions, 24 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index ebaaaebeb11..5ad23994152 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -7074,14 +7074,14 @@ class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut,
SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>,
- Sched<[WriteFAdd]>;
+ Sched<[WriteCRC32]>;
class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut,
X86MemOperand x86memop, SDPatternOperator Int> :
SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2),
!strconcat(asm, "\t{$src2, $src1|$src1, $src2}"),
[(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))],
- IIC_CRC32_MEM>, Sched<[WriteFAddLd, ReadAfterLd]>;
+ IIC_CRC32_MEM>, Sched<[WriteCRC32Ld, ReadAfterLd]>;
let Constraints = "$src1 = $dst" in {
def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem,
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td
index 6ce52328451..71eb873c5ac 100755
--- a/llvm/lib/Target/X86/X86SchedBroadwell.td
+++ b/llvm/lib/Target/X86/X86SchedBroadwell.td
@@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [BWPort4]>;
defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op.
defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication.
defm : BWWriteResPair<WriteIDiv, [BWPort0, BWDivider], 25, [1, 10]>;
+defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads.
diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td
index eea8c42db5a..e2ea697b47f 100644
--- a/llvm/lib/Target/X86/X86SchedHaswell.td
+++ b/llvm/lib/Target/X86/X86SchedHaswell.td
@@ -115,6 +115,7 @@ defm : HWWriteResPair<WriteIMul, [HWPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : HWWriteResPair<WriteShift, [HWPort06], 1>;
defm : HWWriteResPair<WriteJump, [HWPort06], 1>;
+defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td
index 84b40cfa527..31478380ebf 100644
--- a/llvm/lib/Target/X86/X86SchedSandyBridge.td
+++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td
@@ -106,6 +106,7 @@ def : WriteRes<WriteIMulH, []> { let Latency = 3; }
defm : SBWriteResPair<WriteShift, [SBPort05], 1>;
defm : SBWriteResPair<WriteJump, [SBPort5], 1>;
+defm : SBWriteResPair<WriteCRC32, [SBPort1], 3, [1], 1, 5>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
@@ -678,8 +679,6 @@ def: InstRW<[SBWriteResGroup21], (instrs MUL8r, IMUL16rr, IMUL32rr, IMUL32rri, I
def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0",
"ADD_FST0r",
"ADD_FrST0",
- "CRC32r(16|32|64)r8",
- "CRC32r(16|32|64)r64",
"MMX_CVTPI2PSirr",
"MMX_CVTPS2PIirr",
"MMX_CVTTPS2PIirr",
@@ -1416,9 +1415,7 @@ def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> {
let NumMicroOps = 2;
let ResourceCycles = [1,1];
}
-def: InstRW<[SBWriteResGroup72], (instregex "CRC32r(16|32|64)m64",
- "CRC32r(16|32|64)m8",
- "FCOM32m",
+def: InstRW<[SBWriteResGroup72], (instregex "FCOM32m",
"FCOM64m",
"FCOMP32m",
"FCOMP64m")>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 78aec721d37..2a16982346c 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [SKLPort4]>;
defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op.
defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication.
defm : SKLWriteResPair<WriteIDiv, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; // Integer division.
+defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 8b4a394bc3f..c9a9c60ffc0 100755
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -106,6 +106,7 @@ def : WriteRes<WriteRMW, [SKXPort4]>;
defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op.
defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication.
defm : SKXWriteResPair<WriteIDiv, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; // Integer division.
+defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>;
def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads.
diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td
index b994a57e27a..85ca7a9b0ad 100644
--- a/llvm/lib/Target/X86/X86Schedule.td
+++ b/llvm/lib/Target/X86/X86Schedule.td
@@ -110,6 +110,9 @@ defm WriteCvtF2I : X86SchedWritePair; // Float -> Integer.
defm WriteCvtI2F : X86SchedWritePair; // Integer -> Float.
defm WriteCvtF2F : X86SchedWritePair; // Float -> Float size conversion.
+// CRC32 instruction.
+defm WriteCRC32 : X86SchedWritePair;
+
// Strings instructions.
// Packed Compare Implicit Length Strings, Return Mask
defm WritePCmpIStrM : X86SchedWritePair;
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 502c569f354..f3c18492d89 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -124,6 +124,7 @@ def : WriteRes<WriteRMW, [JSAGU]>;
defm : JWriteResIntPair<WriteALU, [JALU01], 1>;
defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication
defm : JWriteResIntPair<WriteIDiv, [JALU1, JDiv], 41, [1, 41], 2>; // Worst case (i64 division)
+defm : JWriteResIntPair<WriteCRC32, [JALU01], 3, [4], 3>;
def : WriteRes<WriteIMulH, [JALU1]> {
let Latency = 6;
@@ -190,22 +191,6 @@ def JWriteIDiv32Ld : SchedWriteRes<[JLAGU, JALU1, JDiv]> {
def : InstRW<[JWriteIDiv32], (instrs DIV32r, IDIV32r)>;
def : InstRW<[JWriteIDiv32Ld], (instrs DIV32m, IDIV32m)>;
-def JWriteCRC32 : SchedWriteRes<[JALU01]> {
- let Latency = 3;
- let ResourceCycles = [4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCRC32], (instrs CRC32r32r8, CRC32r32r16, CRC32r32r32,
- CRC32r64r8, CRC32r64r64)>;
-
-def JWriteCRC32Ld : SchedWriteRes<[JLAGU, JALU01]> {
- let Latency = 6;
- let ResourceCycles = [1, 4];
- let NumMicroOps = 3;
-}
-def : InstRW<[JWriteCRC32Ld], (instrs CRC32r32m8, CRC32r32m16, CRC32r32m32,
- CRC32r64m8, CRC32r64m64)>;
-
////////////////////////////////////////////////////////////////////////////////
// Integer shifts and rotates.
////////////////////////////////////////////////////////////////////////////////
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 94eb5374a69..6fb7e9480c9 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -90,7 +90,8 @@ def : InstRW<[WriteMove], (instrs COPY)>;
defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>;
defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>;
defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>;
-defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
+defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>;
+defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>;
// This is for simple LEAs with one or two input operands.
// The complex ones can only execute on port 1, and they require two cycles on
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index bb5a851b64d..5459485b398 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -151,6 +151,7 @@ defm : ZnWriteResPair<WriteALU, [ZnALU], 1>;
defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>;
defm : ZnWriteResPair<WriteShift, [ZnALU], 1>;
defm : ZnWriteResPair<WriteJump, [ZnALU], 1>;
+defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>;
// Bit counts.
defm : ZnWriteResPair<WriteBitScan, [ZnALU], 3>;
OpenPOWER on IntegriCloud