diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-08 14:55:16 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-08 14:55:16 +0000 |
| commit | 2864b46469818a7cd0ccc8bdee74e91c59775810 (patch) | |
| tree | 922ac5bd03eb5e2c6523f9fbaca4ed457e5d43d8 /llvm/lib/Target/X86 | |
| parent | 739d1a68aa0e537b98058998d137718a7d9403cb (diff) | |
| download | bcm5719-llvm-2864b46469818a7cd0ccc8bdee74e91c59775810.tar.gz bcm5719-llvm-2864b46469818a7cd0ccc8bdee74e91c59775810.zip | |
[X86] Split off WriteIMul64 from WriteIMul schedule class (PR36931)
This fixes a couple of BtVer2 missing instructions that weren't been handled in the override.
NOTE: There are still a lot of overrides that still need cleaning up!
llvm-svn: 331770
Diffstat (limited to 'llvm/lib/Target/X86')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrArithmetic.td | 87 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 5 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 11 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 10 |
11 files changed, 84 insertions, 105 deletions
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 5f423250006..44f6da16d1d 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -80,7 +80,7 @@ let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), "mul{q}\t$src", [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>, - Sched<[WriteIMul]>; + Sched<[WriteIMul64]>; // AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), @@ -89,20 +89,20 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>, SchedLoadReg<WriteIMulLd>; + (implicit EFLAGS)]>, SchedLoadReg<WriteIMul.Folded>; // AX,DX = AX*[mem16] let mayLoad = 1, hasSideEffects = 0 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), - "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMulLd>; + "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), - "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMulLd>; + "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), - "mul{q}\t$src", []>, SchedLoadReg<WriteIMulLd>, + "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>, Requires<[In64BitMode]>; } @@ -122,25 +122,25 @@ def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>, // RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>, - Sched<[WriteIMul]>; + Sched<[WriteIMul64]>; let mayLoad = 1 in { // AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>, SchedLoadReg<WriteIMulLd>; + "imul{b}\t$src", []>, SchedLoadReg<WriteIMul.Folded>; // AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMulLd>; + "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMulLd>; + "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), - "imul{q}\t$src", []>, SchedLoadReg<WriteIMulLd>, + "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64.Folded>, Requires<[In64BitMode]>; } } // hasSideEffects @@ -149,135 +149,133 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), let Defs = [EFLAGS] in { let Constraints = "$src1 = $dst" in { -let isCommutable = 1, SchedRW = [WriteIMul] in { +let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y // Register-Register Signed Integer Multiply def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, - (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize16; + (X86smul_flag GR16:$src1, GR16:$src2))]>, + Sched<[WriteIMul]>, TB, OpSize16; def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, - (X86smul_flag GR32:$src1, GR32:$src2))]>, TB, OpSize32; + (X86smul_flag GR32:$src1, GR32:$src2))]>, + Sched<[WriteIMul]>, TB, OpSize32; def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, GR64:$src2))]>, TB; -} // isCommutable, SchedRW + (X86smul_flag GR64:$src1, GR64:$src2))]>, + Sched<[WriteIMul64]>, TB; +} // isCommutable // Register-Memory Signed Integer Multiply -let SchedRW = [WriteIMulLd, ReadAfterLd] in { def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), (ins GR16:$src1, i16mem:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>, - TB, OpSize16; + Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize16; def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>, - TB, OpSize32; + Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize32; def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>, - TB; -} // SchedRW + Sched<[WriteIMul64.Folded, ReadAfterLd]>, TB; } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] // Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { -let SchedRW = [WriteIMul] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, imm:$src2))]>, - OpSize16; + Sched<[WriteIMul]>, OpSize16; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, - OpSize16; + Sched<[WriteIMul]>, OpSize16; def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, imm:$src2))]>, - OpSize32; + Sched<[WriteIMul]>, OpSize32; def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>, - OpSize32; + Sched<[WriteIMul]>, OpSize32; def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32 (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>; + (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>, + Sched<[WriteIMul64]>; def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, - (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>; -} // SchedRW + (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>, + Sched<[WriteIMul64]>; // Memory-Integer Signed Integer Multiply -let SchedRW = [WriteIMulLd] in { def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>, - OpSize16; + Sched<[WriteIMul.Folded]>, OpSize16; def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2))]>, - OpSize16; + Sched<[WriteIMul.Folded]>, OpSize16; def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>, - OpSize32; + Sched<[WriteIMul.Folded]>, OpSize32; def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2))]>, - OpSize32; + Sched<[WriteIMul.Folded]>, OpSize32; def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32 (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (loadi64 addr:$src1), - i64immSExt32:$src2))]>; + i64immSExt32:$src2))]>, + Sched<[WriteIMul64.Folded]>; def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (loadi64 addr:$src1), - i64immSExt8:$src2))]>; -} // SchedRW + i64immSExt8:$src2))]>, + Sched<[WriteIMul64.Folded]>; } // Defs = [EFLAGS] - - - // unsigned division/remainder let hasSideEffects = 1 in { // so that we don't speculatively execute let Defs = [AL,AH,EFLAGS], Uses = [AX] in @@ -1262,25 +1260,26 @@ let Predicates = [HasBMI], AddedComplexity = -6 in { //===----------------------------------------------------------------------===// // MULX Instruction // -multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop> { +multiclass bmi_mulx<string mnemonic, RegisterClass RC, X86MemOperand x86memop, + X86FoldableSchedWrite sched> { let hasSideEffects = 0 in { let isCommutable = 1 in def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V, Sched<[WriteIMul, WriteIMulH]>; + []>, T8XD, VEX_4V, Sched<[sched, WriteIMulH]>; let mayLoad = 1 in def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), - []>, T8XD, VEX_4V, Sched<[WriteIMulLd, WriteIMulH]>; + []>, T8XD, VEX_4V, Sched<[sched.Folded, WriteIMulH]>; } } let Predicates = [HasBMI2] in { let Uses = [EDX] in - defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem>; + defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul>; let Uses = [RDX] in - defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W; + defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 2c42c4fff09..d97efee6697 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -106,8 +106,9 @@ multiclass BWWriteResPair<X86FoldableSchedWrite SchedRW, def : WriteRes<WriteRMW, [BWPort237,BWPort4]>; // Arithmetic. -defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op. -defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication. +defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op. +defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication. +defm : BWWriteResPair<WriteIMul64, [BWPort1], 3>; // Integer 64-bit multiplication. defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>; defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b30d403b68f..5a612d18b19 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -112,12 +112,13 @@ def : WriteRes<WriteLoad, [HWPort23]> { let Latency = 5; } def : WriteRes<WriteMove, [HWPort0156]>; def : WriteRes<WriteZero, []>; -defm : HWWriteResPair<WriteALU, [HWPort0156], 1>; -defm : HWWriteResPair<WriteIMul, [HWPort1], 3>; +defm : HWWriteResPair<WriteALU, [HWPort0156], 1>; +defm : HWWriteResPair<WriteIMul, [HWPort1], 3>; +defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>; def : WriteRes<WriteIMulH, []> { let Latency = 3; } -defm : HWWriteResPair<WriteShift, [HWPort06], 1>; -defm : HWWriteResPair<WriteJump, [HWPort06], 1>; -defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>; +defm : HWWriteResPair<WriteShift, [HWPort06], 1>; +defm : HWWriteResPair<WriteJump, [HWPort06], 1>; +defm : HWWriteResPair<WriteCRC32, [HWPort1], 3>; defm : HWWriteResPair<WriteCMOV, [HWPort06,HWPort0156], 2, [1,1], 2>; // Conditional move. def : WriteRes<WriteSETCC, [HWPort06]>; // Setcc. diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 10b5794a1f5..41f6d10a849 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -101,8 +101,9 @@ def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; } def : WriteRes<WriteMove, [SBPort015]>; def : WriteRes<WriteZero, []>; -defm : SBWriteResPair<WriteALU, [SBPort015], 1>; -defm : SBWriteResPair<WriteIMul, [SBPort1], 3>; +defm : SBWriteResPair<WriteALU, [SBPort015], 1>; +defm : SBWriteResPair<WriteIMul, [SBPort1], 3>; +defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>; defm : SBWriteResPair<WriteDiv8, [SBPort0, SBDivider], 25, [1, 10]>; defm : SBWriteResPair<WriteDiv16, [SBPort0, SBDivider], 25, [1, 10]>; @@ -1062,13 +1063,6 @@ def: InstRW<[SBWriteResGroup69], (instregex "BTC(16|32|64)mi8", "SHR(8|16|32|64)m1", "SHR(8|16|32|64)mi")>; -def SBWriteResGroup72 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup72], (instrs MUL8m)>; - def SBWriteResGroup77 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { let Latency = 8; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 3a4c9383ab3..fa145d94a6d 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -105,8 +105,9 @@ multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW, def : WriteRes<WriteRMW, [SKLPort237,SKLPort4]>; // Arithmetic. -defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op. -defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication. +defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op. +defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication. +defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication. defm : SKLWriteResPair<WriteDiv8, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; defm : SKLWriteResPair<WriteDiv16, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index aa9e9e78cc5..06999374394 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -105,8 +105,9 @@ multiclass SKXWriteResPair<X86FoldableSchedWrite SchedRW, def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>; // Arithmetic. -defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. -defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication. +defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. +defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication. +defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication. defm : SKXWriteResPair<WriteDiv8, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; defm : SKXWriteResPair<WriteDiv16, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index cd8b8ad5de5..3a5f324ea59 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -74,11 +74,12 @@ def WriteStore : SchedWrite; def WriteMove : SchedWrite; // Arithmetic. -defm WriteALU : X86SchedWritePair; // Simple integer ALU op. -def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>; -defm WriteIMul : X86SchedWritePair; // Integer multiplication. -def WriteIMulH : SchedWrite; // Integer multiplication, high part. -def WriteLEA : SchedWrite; // LEA instructions can't fold loads. +defm WriteALU : X86SchedWritePair; // Simple integer ALU op. +def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>; +defm WriteIMul : X86SchedWritePair; // Integer multiplication. +defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication. +def WriteIMulH : SchedWrite; // Integer multiplication, high part. +def WriteLEA : SchedWrite; // LEA instructions can't fold loads. // Integer division. defm WriteDiv8 : X86SchedWritePair; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 5d35802e672..91ebc5f32cb 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -76,8 +76,9 @@ def : WriteRes<WriteRMW, [AtomPort0]>; // Arithmetic. //////////////////////////////////////////////////////////////////////////////// -defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>; -defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>; +defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>; +defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>; +defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>; defm : AtomWriteResPair<WriteDiv8, [AtomPort01], [AtomPort01], 50, 68, [50], [68]>; defm : AtomWriteResPair<WriteDiv16, [AtomPort01], [AtomPort01], 50, 50, [50], [50]>; @@ -115,13 +116,6 @@ def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> { } def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>; -def AtomWriteIMul64 : SchedWriteRes<[AtomPort01]> { - let Latency = 12; - let ResourceCycles = [12]; -} -def : InstRW<[AtomWriteIMul64], (instrs MUL64r, IMUL64r, IMUL64rr, IMUL64rm, - MUL64m, IMUL64m)>; - def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> { let Latency = 14; let ResourceCycles = [14]; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index f28d58b2d03..4004eff4593 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -157,6 +157,8 @@ def : WriteRes<WriteRMW, [JSAGU]>; defm : JWriteResIntPair<WriteALU, [JALU01], 1>; defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication +defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication +defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>; defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>; defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>; @@ -173,11 +175,6 @@ defm : JWriteResIntPair<WriteCMOV, [JALU01], 1>; // Conditional move. def : WriteRes<WriteSETCC, [JALU01]>; // Setcc. def : WriteRes<WriteSETCCStore, [JALU01,JSAGU]>; -def : WriteRes<WriteIMulH, [JALU1]> { - let Latency = 6; - let ResourceCycles = [4]; -} - // This is for simple LEAs with one or two input operands. // FIXME: SAGU 3-operand LEA def : WriteRes<WriteLEA, [JALU01]>; @@ -192,19 +189,6 @@ defm : JWriteResIntPair<WriteTZCNT, [JALU01], 2, [2]>; defm : JWriteResIntPair<WriteBEXTR, [JALU01], 1>; defm : JWriteResIntPair<WriteBZHI, [JALU01], 1>; // NOTE: Doesn't exist on Jaguar. -def JWriteIMul64 : SchedWriteRes<[JALU1, JMul]> { - let Latency = 6; - let ResourceCycles = [1, 4]; - let NumMicroOps = 2; -} -def JWriteIMul64Ld : SchedWriteRes<[JLAGU, JALU1, JMul]> { - let Latency = 9; - let ResourceCycles = [1, 1, 4]; - let NumMicroOps = 2; -} -def : InstRW<[JWriteIMul64], (instrs MUL64r, IMUL64r)>; -def : InstRW<[JWriteIMul64Ld], (instrs MUL64m, IMUL64m)>; - //////////////////////////////////////////////////////////////////////////////// // Integer shifts and rotates. //////////////////////////////////////////////////////////////////////////////// diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index b5e3507bb1d..92430746a8b 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -92,11 +92,12 @@ def : WriteRes<WriteLDMXCSR, [SLM_MEC_RSV]> { let Latency = 3; } // Treat misc copies as a move. def : InstRW<[WriteMove], (instrs COPY)>; -defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>; -defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>; -defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>; -defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>; -defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>; +defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteShift, [SLM_IEC_RSV0], 1>; +defm : SLMWriteResPair<WriteJump, [SLM_IEC_RSV1], 1>; +defm : SLMWriteResPair<WriteCRC32, [SLM_IEC_RSV1], 3>; defm : SLMWriteResPair<WriteCMOV, [SLM_IEC_RSV01], 2, [2]>; def : WriteRes<WriteSETCC, [SLM_IEC_RSV01]>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 7a289ecba6d..b7abbee2652 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -100,7 +100,8 @@ def : ReadAdvance<ReadAfterLd, 4>; // This multiclass is for folded loads for integer units. multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW, list<ProcResourceKind> ExePorts, - int Lat, list<int> Res = [], int UOps = 1> { + int Lat, list<int> Res = [], int UOps = 1, + int LoadLat = 4, int LoadUOps = 1> { // Register variant takes 1-cycle on Execution Port. def : WriteRes<SchedRW, ExePorts> { let Latency = Lat; @@ -109,11 +110,11 @@ multiclass ZnWriteResPair<X86FoldableSchedWrite SchedRW, } // Memory variant also uses a cycle on ZnAGU - // adds 4 cycles to the latency. + // adds LoadLat cycles to the latency (default = 4). def : WriteRes<SchedRW.Folded, !listconcat([ZnAGU], ExePorts)> { - let Latency = !add(Lat, 4); + let Latency = !add(Lat, LoadLat); let ResourceCycles = !if(!empty(Res), [], !listconcat([1], Res)); - let NumMicroOps = !add(UOps, 1); + let NumMicroOps = !add(UOps, LoadUOps); } } @@ -150,6 +151,7 @@ def : WriteRes<WriteZero, []>; def : WriteRes<WriteLEA, [ZnALU]>; defm : ZnWriteResPair<WriteALU, [ZnALU], 1>; defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>; +defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; defm : ZnWriteResPair<WriteShift, [ZnALU], 1>; defm : ZnWriteResPair<WriteJump, [ZnALU], 1>; defm : ZnWriteResFpuPair<WriteCRC32, [ZnFPU0], 3>; |

