diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrArithmetic.td | 62 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 66 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 74 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 75 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 73 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 75 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 16 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 44 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 43 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/bmi2-schedule.ll | 8 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s | 8 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/Generic/resources-bmi2.s | 18 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s | 8 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s | 8 | ||||
| -rw-r--r-- | llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s | 8 |
17 files changed, 212 insertions, 402 deletions
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index c0711eaf071..f7cad2bc4ce 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -63,18 +63,18 @@ def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, GR8:$src)), - (implicit EFLAGS)]>, Sched<[WriteIMul]>; + (implicit EFLAGS)]>, Sched<[WriteIMul8]>; // AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src), "mul{w}\t$src", - []>, OpSize16, Sched<[WriteIMul]>; + []>, OpSize16, Sched<[WriteIMul16]>; // EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>, - OpSize32, Sched<[WriteIMul]>; + OpSize32, Sched<[WriteIMul32]>; // RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src), @@ -89,16 +89,16 @@ def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. [(set AL, (mul AL, (loadi8 addr:$src))), - (implicit EFLAGS)]>, SchedLoadReg<WriteIMul.Folded>; + (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8.Folded>; // AX,DX = AX*[mem16] let mayLoad = 1, hasSideEffects = 0 in { let Defs = [AX,DX,EFLAGS], Uses = [AX] in def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src), - "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>; + "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16.Folded>; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), - "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>; + "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32.Folded>; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), @@ -110,15 +110,15 @@ let hasSideEffects = 0 in { // AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>, - Sched<[WriteIMul]>; + Sched<[WriteIMul8]>; // AX,DX = AX*GR16 let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, - OpSize16, Sched<[WriteIMul]>; + OpSize16, Sched<[WriteIMul16]>; // EAX,EDX = EAX*GR32 let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>, - OpSize32, Sched<[WriteIMul]>; + OpSize32, Sched<[WriteIMul32]>; // RAX,RDX = RAX*GR64 let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>, @@ -128,15 +128,15 @@ let mayLoad = 1 in { // AL,AH = AL*[mem8] let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), - "imul{b}\t$src", []>, SchedLoadReg<WriteIMul.Folded>; + "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8.Folded>; // AX,DX = AX*[mem16] let Defs = [AX,DX,EFLAGS], Uses = [AX] in def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), - "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul.Folded>; + "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16.Folded>; // EAX,EDX = EAX*[mem32] let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), - "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul.Folded>; + "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32.Folded>; // RAX,RDX = RAX*[mem64] let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), @@ -156,18 +156,18 @@ def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, GR16:$src2))]>, - Sched<[WriteIMul]>, TB, OpSize16; + Sched<[WriteIMul16Reg]>, TB, OpSize16; def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, GR32:$src2))]>, - Sched<[WriteIMul]>, TB, OpSize32; + Sched<[WriteIMul32Reg]>, TB, OpSize32; def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, GR64:$src2))]>, - Sched<[WriteIMul64]>, TB; + Sched<[WriteIMul64Reg]>, TB; } // isCommutable // Register-Memory Signed Integer Multiply @@ -176,19 +176,19 @@ def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst), "imul{w}\t{$src2, $dst|$dst, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>, - Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize16; + Sched<[WriteIMul16Reg.Folded, ReadAfterLd]>, TB, OpSize16; def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), "imul{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>, - Sched<[WriteIMul.Folded, ReadAfterLd]>, TB, OpSize32; + Sched<[WriteIMul32Reg.Folded, ReadAfterLd]>, TB, OpSize32; def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>, - Sched<[WriteIMul64.Folded, ReadAfterLd]>, TB; + Sched<[WriteIMul64Reg.Folded, ReadAfterLd]>, TB; } // Constraints = "$src1 = $dst" } // Defs = [EFLAGS] @@ -201,37 +201,37 @@ def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, imm:$src2))]>, - Sched<[WriteIMul]>, OpSize16; + Sched<[WriteIMul16Imm]>, OpSize16; def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8 (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>, - Sched<[WriteIMul]>, OpSize16; + Sched<[WriteIMul16Imm]>, OpSize16; def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32 (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, imm:$src2))]>, - Sched<[WriteIMul]>, OpSize32; + Sched<[WriteIMul32Imm]>, OpSize32; def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8 (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>, - Sched<[WriteIMul]>, OpSize32; + Sched<[WriteIMul32Imm]>, OpSize32; def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32 (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>, - Sched<[WriteIMul64]>; + Sched<[WriteIMul64Imm]>; def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8 (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>, - Sched<[WriteIMul64]>; + Sched<[WriteIMul64Imm]>; // Memory-Integer Signed Integer Multiply def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 @@ -239,41 +239,41 @@ def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16 "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>, - Sched<[WriteIMul.Folded]>, OpSize16; + Sched<[WriteIMul16Imm.Folded]>, OpSize16; def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8 (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2), "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR16:$dst, EFLAGS, (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2))]>, - Sched<[WriteIMul.Folded]>, OpSize16; + Sched<[WriteIMul16Imm.Folded]>, OpSize16; def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32 (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>, - Sched<[WriteIMul.Folded]>, OpSize32; + Sched<[WriteIMul32Imm.Folded]>, OpSize32; def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8 (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2), "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, EFLAGS, (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2))]>, - Sched<[WriteIMul.Folded]>, OpSize32; + Sched<[WriteIMul32Imm.Folded]>, OpSize32; def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32 (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2))]>, - Sched<[WriteIMul64.Folded]>; + Sched<[WriteIMul64Imm.Folded]>; def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8 (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2), "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2))]>, - Sched<[WriteIMul64.Folded]>; + Sched<[WriteIMul64Imm.Folded]>; } // Defs = [EFLAGS] // unsigned division/remainder @@ -1299,7 +1299,7 @@ let hasSideEffects = 0 in { let Predicates = [HasBMI2] in { let Uses = [EDX] in - defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul>; + defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteIMul32>; let Uses = [RDX] in defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteIMul64>, VEX_W; } diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 2e91426ac04..0efdd971690 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -108,8 +108,21 @@ def : WriteRes<WriteRMW, [BWPort237,BWPort4]>; // Arithmetic. defm : BWWriteResPair<WriteALU, [BWPort0156], 1>; // Simple integer ALU op. defm : BWWriteResPair<WriteADC, [BWPort06], 1>; // Integer ALU + flags op. -defm : BWWriteResPair<WriteIMul, [BWPort1], 3>; // Integer multiplication. -defm : BWWriteResPair<WriteIMul64, [BWPort1], 3>; // Integer 64-bit multiplication. + +// Integer multiplication. +defm : BWWriteResPair<WriteIMul8, [BWPort1], 3>; +defm : BWWriteResPair<WriteIMul16, [BWPort1,BWPort06,BWPort0156], 4, [1,1,2], 4>; +defm : X86WriteRes<WriteIMul16Imm, [BWPort1,BWPort0156], 4, [1,1], 2>; +defm : X86WriteRes<WriteIMul16ImmLd, [BWPort1,BWPort0156,BWPort23], 8, [1,1,1], 3>; +defm : BWWriteResPair<WriteIMul16Reg, [BWPort1], 3>; +defm : BWWriteResPair<WriteIMul32, [BWPort1,BWPort06,BWPort0156], 4, [1,1,1], 3>; +defm : BWWriteResPair<WriteIMul32Imm, [BWPort1], 3>; +defm : BWWriteResPair<WriteIMul32Reg, [BWPort1], 3>; +defm : BWWriteResPair<WriteIMul64, [BWPort1,BWPort5], 4, [1,1], 2>; +defm : BWWriteResPair<WriteIMul64Imm, [BWPort1], 3>; +defm : BWWriteResPair<WriteIMul64Reg, [BWPort1], 3>; +def : WriteRes<WriteIMulH, []> { let Latency = 3; } + defm : BWWriteResPair<WriteDiv8, [BWPort0, BWDivider], 25, [1, 10]>; defm : BWWriteResPair<WriteDiv16, [BWPort0, BWDivider], 25, [1, 10]>; defm : BWWriteResPair<WriteDiv32, [BWPort0, BWDivider], 25, [1, 10]>; @@ -126,7 +139,6 @@ defm : X86WriteRes<WriteBSWAP64, [BWPort06, BWPort15], 2, [1, 1], 2>; defm : X86WriteRes<WriteXCHG, [BWPort0156], 2, [3], 3>; defm : BWWriteResPair<WriteCRC32, [BWPort1], 3>; -def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part. def : WriteRes<WriteLEA, [BWPort15]>; // LEA instructions can't fold loads. @@ -735,13 +747,6 @@ def: InstRW<[BWWriteResGroup27], (instrs MMX_CVTPI2PSirr)>; def: InstRW<[BWWriteResGroup27], (instregex "P(DEP|EXT)(32|64)rr", "(V?)CVTDQ2PS(Y?)rr")>; -def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[BWWriteResGroup27_16], (instrs IMUL16rri, IMUL16rri8)>; - def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> { let Latency = 3; let NumMicroOps = 1; @@ -818,8 +823,7 @@ def BWWriteResGroup42 : SchedWriteRes<[BWPort1,BWPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup42], (instrs IMUL64r, MUL64r, MULX64rr, - MMX_CVTPI2PDirr)>; +def: InstRW<[BWWriteResGroup42], (instrs MMX_CVTPI2PDirr)>; def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIirr", "MMX_CVT(T?)PS2PIirr", "(V?)CVTDQ2PDrr", @@ -830,13 +834,6 @@ def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVT(T?)PD2PIirr", "(V?)CVTSI2SSrr", "(V?)CVT(T?)PD2DQrr")>; -def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[BWWriteResGroup42_16], (instrs IMUL16r, MUL16r)>; - def BWWriteResGroup43 : SchedWriteRes<[BWPort0,BWPort4,BWPort237]> { let Latency = 4; let NumMicroOps = 3; @@ -902,13 +899,6 @@ def BWWriteResGroup51 : SchedWriteRes<[BWPort1,BWPort6,BWPort06]> { } def: InstRW<[BWWriteResGroup51], (instregex "STR(16|32|64)r")>; -def BWWriteResGroup52 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup52], (instrs IMUL32r, MUL32r, MULX32rr)>; - def BWWriteResGroup54 : SchedWriteRes<[BWPort6,BWPort0156]> { let Latency = 5; let NumMicroOps = 5; @@ -1133,20 +1123,6 @@ def: InstRW<[BWWriteResGroup91], (instrs MMX_CVTPI2PSirm, VCVTDQ2PSrm)>; def: InstRW<[BWWriteResGroup91], (instregex "P(DEP|EXT)(32|64)rm")>; -def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[BWWriteResGroup91_16], (instrs IMUL16rmi, IMUL16rmi8)>; - -def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort06, BWPort0156, BWPort23]> { - let Latency = 9; - let NumMicroOps = 5; - let ResourceCycles = [1,1,2,1]; -} -def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>; - def BWWriteResGroup92 : SchedWriteRes<[BWPort5,BWPort23]> { let Latency = 8; let NumMicroOps = 2; @@ -1220,8 +1196,7 @@ def BWWriteResGroup107 : SchedWriteRes<[BWPort1,BWPort5,BWPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup107], (instrs IMUL64m, MUL64m, MULX64rm, - CVTPD2PSrm, +def: InstRW<[BWWriteResGroup107], (instrs CVTPD2PSrm, CVTPD2DQrm, CVTTPD2DQrm, MMX_CVTPI2PDirm)>; @@ -1273,13 +1248,6 @@ def BWWriteResGroup120 : SchedWriteRes<[BWPort0,BWPort1,BWPort5,BWPort23]> { } def: InstRW<[BWWriteResGroup120], (instregex "CVTTSS2SI64rm")>; -def BWWriteResGroup121 : SchedWriteRes<[BWPort1,BWPort23,BWPort06,BWPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[BWWriteResGroup121], (instrs IMUL32m, MUL32m, MULX32rm)>; - def BWWriteResGroup122_1 : SchedWriteRes<[BWPort0,BWFPDivider]> { let Latency = 11; let NumMicroOps = 1; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index f5d6334a0d6..e63916ed894 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -121,8 +121,20 @@ def : WriteRes<WriteZero, []>; // Arithmetic. defm : HWWriteResPair<WriteALU, [HWPort0156], 1>; defm : HWWriteResPair<WriteADC, [HWPort06, HWPort0156], 2, [1,1], 2>; -defm : HWWriteResPair<WriteIMul, [HWPort1], 3>; -defm : HWWriteResPair<WriteIMul64, [HWPort1], 3>; + +// Integer multiplication. +defm : HWWriteResPair<WriteIMul8, [HWPort1], 3>; +defm : HWWriteResPair<WriteIMul16, [HWPort1,HWPort06,HWPort0156], 4, [1,1,2], 4>; +defm : X86WriteRes<WriteIMul16Imm, [HWPort1,HWPort0156], 4, [1,1], 2>; +defm : X86WriteRes<WriteIMul16ImmLd, [HWPort1,HWPort0156,HWPort23], 8, [1,1,1], 3>; +defm : HWWriteResPair<WriteIMul16Reg, [HWPort1], 3>; +defm : HWWriteResPair<WriteIMul32, [HWPort1,HWPort06,HWPort0156], 4, [1,1,1], 3>; +defm : HWWriteResPair<WriteIMul32Imm, [HWPort1], 3>; +defm : HWWriteResPair<WriteIMul32Reg, [HWPort1], 3>; +defm : HWWriteResPair<WriteIMul64, [HWPort1,HWPort6], 4, [1,1], 2>; +defm : HWWriteResPair<WriteIMul64Imm, [HWPort1], 3>; +defm : HWWriteResPair<WriteIMul64Reg, [HWPort1], 3>; +def : WriteRes<WriteIMulH, []> { let Latency = 3; } defm : X86WriteRes<WriteBSWAP32, [HWPort15], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [HWPort06, HWPort15], 2, [1,1], 2>; @@ -130,8 +142,6 @@ defm : X86WriteRes<WriteCMPXCHG,[HWPort06, HWPort0156], 5, [2,3], 5>; defm : X86WriteRes<WriteCMPXCHGRMW,[HWPort23,HWPort06,HWPort0156,HWPort237,HWPort4], 9, [1,2,1,1,1], 6>; defm : X86WriteRes<WriteXCHG, [HWPort0156], 2, [3], 3>; -def : WriteRes<WriteIMulH, []> { let Latency = 3; } - // Integer shifts and rotates. defm : HWWriteResPair<WriteShift, [HWPort06], 1>; defm : HWWriteResPair<WriteShiftCL, [HWPort06, HWPort0156], 3, [2,1], 3>; @@ -957,20 +967,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { def: InstRW<[HWWriteResGroup12], (instrs MMX_CVTPI2PSirm)>; def: InstRW<[HWWriteResGroup12], (instregex "P(DEP|EXT)(32|64)rm")>; -def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup12_1], (instrs IMUL16rmi, IMUL16rmi8)>; - -def HWWriteResGroup12_2 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156,HWPort23]> { - let Latency = 9; - let NumMicroOps = 5; - let ResourceCycles = [1,1,2,1]; -} -def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>; - def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 6; let NumMicroOps = 2; @@ -1221,13 +1217,6 @@ def: InstRW<[HWWriteResGroup50], (instrs MMX_CVTPI2PSirr)>; def: InstRW<[HWWriteResGroup50], (instregex "P(DEP|EXT)(32|64)rr", "(V?)CVTDQ2PS(Y?)rr")>; -def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup50_16i], (instrs IMUL16rri, IMUL16rri8)>; - def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> { let Latency = 3; let NumMicroOps = 1; @@ -1369,20 +1358,6 @@ def: InstRW<[HWWriteResGroup73], (instregex "(V?)CVTDQ2PDrr", "(V?)CVTSI2SSrr", "(V?)CVT(T?)PD2DQrr")>; -def HWWriteResGroup74 : SchedWriteRes<[HWPort1,HWPort6]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup74], (instrs IMUL64r, MUL64r, MULX64rr)>; - -def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort06, HWPort0156]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>; - def HWWriteResGroup75 : SchedWriteRes<[HWPort1,HWPort23]> { let Latency = 11; let NumMicroOps = 3; @@ -1430,13 +1405,6 @@ def: InstRW<[HWWriteResGroup78_1], (instrs MMX_CVTPI2PDirm, CVTSD2SSrm, VCVTSD2SSrm)>; -def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup79], (instrs IMUL64m, MUL64m, MULX64rm)>; - def HWWriteResGroup80 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { let Latency = 9; let NumMicroOps = 3; @@ -1517,13 +1485,6 @@ def HWWriteResGroup94 : SchedWriteRes<[HWPort1,HWPort6,HWPort06]> { } def: InstRW<[HWWriteResGroup94], (instregex "STR(16|32|64)r")>; -def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup95], (instrs IMUL32r, MUL32r, MULX32rr)>; - def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { let Latency = 10; let NumMicroOps = 4; @@ -1531,13 +1492,6 @@ def HWWriteResGroup97 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { } def: InstRW<[HWWriteResGroup97], (instregex "CVTTSS2SI64rm")>; -def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort23,HWPort06,HWPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup98], (instrs IMUL32m, MUL32m, MULX32rm)>; - def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> { let Latency = 5; let NumMicroOps = 5; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index e9725b799a5..e01170a6b17 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -109,10 +109,21 @@ def : WriteRes<WriteZero, []>; // Arithmetic. defm : SBWriteResPair<WriteALU, [SBPort015], 1>; defm : SBWriteResPair<WriteADC, [SBPort05,SBPort015], 2, [1,1], 2>; -defm : SBWriteResPair<WriteIMul, [SBPort1], 3>; -defm : SBWriteResPair<WriteIMul64, [SBPort1], 3>; -defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>; +defm : SBWriteResPair<WriteIMul8, [SBPort1], 3>; +defm : SBWriteResPair<WriteIMul16, [SBPort1,SBPort05,SBPort015], 4, [1,1,2], 4>; +defm : X86WriteRes<WriteIMul16Imm, [SBPort1,SBPort015], 4, [1,1], 2>; +defm : X86WriteRes<WriteIMul16ImmLd, [SBPort1,SBPort015,SBPort23], 8, [1,1,1], 3>; +defm : SBWriteResPair<WriteIMul16Reg, [SBPort1], 3>; +defm : SBWriteResPair<WriteIMul32, [SBPort1,SBPort05,SBPort015], 4, [1,1,1], 3>; +defm : SBWriteResPair<WriteIMul32Imm, [SBPort1], 3>; +defm : SBWriteResPair<WriteIMul32Reg, [SBPort1], 3>; +defm : SBWriteResPair<WriteIMul64, [SBPort1,SBPort0], 4, [1,1], 2>; +defm : SBWriteResPair<WriteIMul64Imm, [SBPort1], 3>; +defm : SBWriteResPair<WriteIMul64Reg, [SBPort1], 3>; +def : WriteRes<WriteIMulH, []> { let Latency = 3; } + +defm : X86WriteRes<WriteXCHG, [SBPort015], 2, [3], 3>; defm : X86WriteRes<WriteBSWAP32, [SBPort1], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [SBPort1, SBPort05], 2, [1,1], 2>; defm : X86WriteRes<WriteCMPXCHG, [SBPort05, SBPort015], 5, [1,3], 4>; @@ -127,8 +138,6 @@ defm : SBWriteResPair<WriteIDiv16, [SBPort0, SBDivider], 25, [1, 10]>; defm : SBWriteResPair<WriteIDiv32, [SBPort0, SBDivider], 25, [1, 10]>; defm : SBWriteResPair<WriteIDiv64, [SBPort0, SBDivider], 25, [1, 10]>; -def : WriteRes<WriteIMulH, []> { let Latency = 3; } - // SHLD/SHRD. defm : X86WriteRes<WriteSHDrri, [SBPort05, SBPort015], 2, [1, 1], 2>; defm : X86WriteRes<WriteSHDrrcl,[SBPort05, SBPort015], 4, [3, 1], 4>; @@ -641,13 +650,6 @@ def SBWriteResGroup21 : SchedWriteRes<[SBPort1]> { } def: InstRW<[SBWriteResGroup21], (instrs PUSHFS64)>; -def SBWriteResGroup21_16i : SchedWriteRes<[SBPort1, SBPort015]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup21_16i], (instrs IMUL16rri, IMUL16rri8)>; - def SBWriteResGroup22 : SchedWriteRes<[SBPort0,SBPort5]> { let Latency = 3; let NumMicroOps = 2; @@ -677,27 +679,6 @@ def SBWriteResGroup26_2 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { } def: InstRW<[SBWriteResGroup26_2], (instrs COM_FIPr, COM_FIr, UCOM_FIPr, UCOM_FIr)>; -def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup27], (instrs IMUL64r, MUL64r)>; - -def SBWriteResGroup27_1 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup27_1], (instrs IMUL32r, MUL32r)>; - -def SBWriteResGroup27_2 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup27_2], (instrs IMUL16r, MUL16r)>; - def SBWriteResGroup29 : SchedWriteRes<[SBPort1,SBPort015]> { let Latency = 4; let NumMicroOps = 2; @@ -1009,34 +990,6 @@ def SBWriteResGroup93 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { } def: InstRW<[SBWriteResGroup93], (instregex "CVT(T?)(SD|SS)2SI(64)?rm")>; -def SBWriteResGroup93_1 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup93_1], (instrs IMUL64m, MUL64m)>; - -def SBWriteResGroup93_2 : SchedWriteRes<[SBPort1,SBPort23,SBPort05,SBPort015]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup93_2], (instrs IMUL32m, MUL32m)>; - -def SBWriteResGroup93_3 : SchedWriteRes<[SBPort1,SBPort05,SBPort015,SBPort23]> { - let Latency = 9; - let NumMicroOps = 5; - let ResourceCycles = [1,1,2,1]; -} -def: InstRW<[SBWriteResGroup93_3], (instrs IMUL16m, MUL16m)>; - -def SBWriteResGroup93_4 : SchedWriteRes<[SBPort1,SBPort015,SBPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup93_4], (instrs IMUL16rmi, IMUL16rmi8)>; - def SBWriteResGroup95 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { let Latency = 9; let NumMicroOps = 3; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 5bad160440b..e05de50c04e 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -107,8 +107,20 @@ def : WriteRes<WriteRMW, [SKLPort237,SKLPort4]>; // Arithmetic. defm : SKLWriteResPair<WriteALU, [SKLPort0156], 1>; // Simple integer ALU op. defm : SKLWriteResPair<WriteADC, [SKLPort06], 1>; // Integer ALU + flags op. -defm : SKLWriteResPair<WriteIMul, [SKLPort1], 3>; // Integer multiplication. -defm : SKLWriteResPair<WriteIMul64, [SKLPort1], 3>; // Integer 64-bit multiplication. + +// Integer multiplication. +defm : SKLWriteResPair<WriteIMul8, [SKLPort1], 3>; +defm : SKLWriteResPair<WriteIMul16, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,2], 4>; +defm : X86WriteRes<WriteIMul16Imm, [SKLPort1,SKLPort0156], 4, [1,1], 2>; +defm : X86WriteRes<WriteIMul16ImmLd, [SKLPort1,SKLPort0156,SKLPort23], 8, [1,1,1], 3>; +defm : SKLWriteResPair<WriteIMul16Reg, [SKLPort1], 3>; +defm : SKLWriteResPair<WriteIMul32, [SKLPort1,SKLPort06,SKLPort0156], 4, [1,1,1], 3>; +defm : SKLWriteResPair<WriteIMul32Imm, [SKLPort1], 3>; +defm : SKLWriteResPair<WriteIMul32Reg, [SKLPort1], 3>; +defm : SKLWriteResPair<WriteIMul64, [SKLPort1,SKLPort5], 4, [1,1], 2>; +defm : SKLWriteResPair<WriteIMul64Imm, [SKLPort1], 3>; +defm : SKLWriteResPair<WriteIMul64Reg, [SKLPort1], 3>; +def : WriteRes<WriteIMulH, []> { let Latency = 3; } defm : X86WriteRes<WriteBSWAP32, [SKLPort15], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [SKLPort06, SKLPort15], 2, [1,1], 2>; @@ -127,7 +139,6 @@ defm : SKLWriteResPair<WriteIDiv64, [SKLPort0, SKLDivider], 25, [1,10], 1, 4>; defm : SKLWriteResPair<WriteCRC32, [SKLPort1], 3>; -def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part. def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads. defm : SKLWriteResPair<WriteCMOV, [SKLPort06], 1, [1], 1>; // Conditional move. @@ -738,13 +749,6 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> { def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; -def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup29_16i], (instrs IMUL16rri, IMUL16rri8)>; - def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> { let Latency = 3; let NumMicroOps = 1; @@ -850,20 +854,6 @@ def SKLWriteResGroup48 : SchedWriteRes<[SKLPort01]> { def: InstRW<[SKLWriteResGroup48], (instregex "(V?)CVTDQ2PS(Y?)rr", "(V?)CVT(T?)PS2DQ(Y?)rr")>; -def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKLWriteResGroup51], (instrs IMUL64r, MUL64r, MULX64rr)>; - -def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>; - def SKLWriteResGroup53 : SchedWriteRes<[SKLPort4,SKLPort5,SKLPort237]> { let Latency = 4; let NumMicroOps = 3; @@ -941,13 +931,6 @@ def SKLWriteResGroup61 : SchedWriteRes<[SKLPort1,SKLPort6,SKLPort06]> { } def: InstRW<[SKLWriteResGroup61], (instregex "STR(16|32|64)r")>; -def SKLWriteResGroup62 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup62], (instrs IMUL32r, MUL32r, MULX32rr)>; - def SKLWriteResGroup63 : SchedWriteRes<[SKLPort06,SKLPort0156]> { let Latency = 5; let NumMicroOps = 5; @@ -1218,20 +1201,6 @@ def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> { def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; -def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup107_16], (instrs IMUL16rmi, IMUL16rmi8)>; - -def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort06, SKLPort0156, SKLPort23]> { - let Latency = 9; - let NumMicroOps = 5; - let ResourceCycles = [1,1,2,1]; -} -def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>; - def SKLWriteResGroup108 : SchedWriteRes<[SKLPort5,SKLPort23]> { let Latency = 8; let NumMicroOps = 2; @@ -1313,13 +1282,6 @@ def SKLWriteResGroup123 : SchedWriteRes<[SKLPort23,SKLPort01]> { def: InstRW<[SKLWriteResGroup123], (instregex "MMX_CVT(T?)PS2PIirm", "(V?)CVTPS2PDrm")>; -def SKLWriteResGroup127 : SchedWriteRes<[SKLPort1,SKLPort5,SKLPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKLWriteResGroup127], (instrs IMUL64m, MUL64m, MULX64rm)>; - def SKLWriteResGroup128 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { let Latency = 9; let NumMicroOps = 4; @@ -1377,13 +1339,6 @@ def SKLWriteResGroup140 : SchedWriteRes<[SKLPort5,SKLPort01,SKLPort23]> { def: InstRW<[SKLWriteResGroup140], (instrs VPHADDSWYrm, VPHSUBSWYrm)>; -def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKLWriteResGroup142], (instrs IMUL32m, MUL32m, MULX32rm)>; - def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 10; let NumMicroOps = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 02a537363ca..e2ced7dc38f 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -107,8 +107,21 @@ def : WriteRes<WriteRMW, [SKXPort237,SKXPort4]>; // Arithmetic. defm : SKXWriteResPair<WriteALU, [SKXPort0156], 1>; // Simple integer ALU op. defm : SKXWriteResPair<WriteADC, [SKXPort06], 1>; // Integer ALU + flags op. -defm : SKXWriteResPair<WriteIMul, [SKXPort1], 3>; // Integer multiplication. -defm : SKXWriteResPair<WriteIMul64, [SKXPort1], 3>; // Integer 64-bit multiplication. + +// Integer multiplication. +defm : SKXWriteResPair<WriteIMul8, [SKXPort1], 3>; +defm : SKXWriteResPair<WriteIMul16, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,2], 4>; +defm : X86WriteRes<WriteIMul16Imm, [SKXPort1,SKXPort0156], 4, [1,1], 2>; +defm : X86WriteRes<WriteIMul16ImmLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; +defm : X86WriteRes<WriteIMul16Reg, [SKXPort1], 3, [1], 1>; +defm : X86WriteRes<WriteIMul16RegLd, [SKXPort1,SKXPort0156,SKXPort23], 8, [1,1,1], 3>; +defm : SKXWriteResPair<WriteIMul32, [SKXPort1,SKXPort06,SKXPort0156], 4, [1,1,1], 3>; +defm : SKXWriteResPair<WriteIMul32Imm, [SKXPort1], 3>; +defm : SKXWriteResPair<WriteIMul32Reg, [SKXPort1], 3>; +defm : SKXWriteResPair<WriteIMul64, [SKXPort1,SKXPort5], 4, [1,1], 2>; +defm : SKXWriteResPair<WriteIMul64Imm, [SKXPort1], 3>; +defm : SKXWriteResPair<WriteIMul64Reg, [SKXPort1], 3>; +def : WriteRes<WriteIMulH, []> { let Latency = 3; } defm : X86WriteRes<WriteBSWAP32, [SKXPort15], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [SKXPort06, SKXPort15], 2, [1,1], 2>; @@ -127,7 +140,6 @@ defm : SKXWriteResPair<WriteIDiv64, [SKXPort0, SKXDivider], 25, [1,10], 1, 4>; defm : SKXWriteResPair<WriteCRC32, [SKXPort1], 3>; -def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part. def : WriteRes<WriteLEA, [SKXPort15]>; // LEA instructions can't fold loads. defm : SKXWriteResPair<WriteCMOV, [SKXPort06], 1, [1], 1>; // Conditional move. @@ -778,14 +790,6 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; -def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup31_16i], (instrs IMUL16rri, IMUL16rri8)>; - - def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { let Latency = 3; let NumMicroOps = 1; @@ -969,20 +973,6 @@ def: InstRW<[SKXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr", "VPMOVUSWB(Z|Z128|Z256)rr", "VPMOVWB(Z|Z128|Z256)rr")>; -def SKXWriteResGroup52 : SchedWriteRes<[SKXPort1,SKXPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>; - -def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>; - def SKXWriteResGroup54 : SchedWriteRes<[SKXPort4,SKXPort5,SKXPort237]> { let Latency = 4; let NumMicroOps = 3; @@ -1070,13 +1060,6 @@ def SKXWriteResGroup63 : SchedWriteRes<[SKXPort1,SKXPort6,SKXPort06]> { } def: InstRW<[SKXWriteResGroup63], (instregex "STR(16|32|64)r")>; -def SKXWriteResGroup64 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup64], (instrs IMUL32r, MUL32r, MULX32rr)>; - def SKXWriteResGroup65 : SchedWriteRes<[SKXPort4,SKXPort237,SKXPort015]> { let Latency = 5; let NumMicroOps = 3; @@ -1519,20 +1502,6 @@ def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; -def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup118_16_1], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; - -def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort06, SKXPort0156, SKXPort23]> { - let Latency = 9; - let NumMicroOps = 5; - let ResourceCycles = [1,1,2,1]; -} -def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>; - def SKXWriteResGroup119 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 8; let NumMicroOps = 2; @@ -1741,13 +1710,6 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm", "(V?)CVTPS2PDrm")>; -def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SKXWriteResGroup142], (instrs IMUL64m, MUL64m, MULX64rm)>; - def SKXWriteResGroup143 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { let Latency = 9; let NumMicroOps = 4; @@ -1857,13 +1819,6 @@ def SKXWriteResGroup154 : SchedWriteRes<[SKXPort5,SKXPort01,SKXPort23]> { def: InstRW<[SKXWriteResGroup154], (instrs VPHADDSWYrm, VPHSUBSWYrm)>; -def SKXWriteResGroup156 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort06,SKXPort0156]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SKXWriteResGroup156], (instrs IMUL32m, MUL32m, MULX32rm)>; - def SKXWriteResGroup157 : SchedWriteRes<[SKXPort4,SKXPort6,SKXPort23,SKXPort237,SKXPort06,SKXPort0156]> { let Latency = 10; let NumMicroOps = 8; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 344a94f878d..538ba467c66 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -113,11 +113,21 @@ defm WriteALU : X86SchedWritePair; // Simple integer ALU op. defm WriteADC : X86SchedWritePair; // Integer ALU + flags op. def WriteALURMW : WriteSequence<[WriteALULd, WriteStore]>; def WriteADCRMW : WriteSequence<[WriteADCLd, WriteStore]>; -defm WriteIMul : X86SchedWritePair; // Integer multiplication. -defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication. -def WriteIMulH : SchedWrite; // Integer multiplication, high part. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. +// Integer multiplication +defm WriteIMul8 : X86SchedWritePair; // Integer 8-bit multiplication. +defm WriteIMul16 : X86SchedWritePair; // Integer 16-bit multiplication. +defm WriteIMul16Imm : X86SchedWritePair; // Integer 16-bit multiplication by immediate. +defm WriteIMul16Reg : X86SchedWritePair; // Integer 16-bit multiplication by register. +defm WriteIMul32 : X86SchedWritePair; // Integer 32-bit multiplication. +defm WriteIMul32Imm : X86SchedWritePair; // Integer 32-bit multiplication by immediate. +defm WriteIMul32Reg : X86SchedWritePair; // Integer 32-bit multiplication by register. +defm WriteIMul64 : X86SchedWritePair; // Integer 64-bit multiplication. +defm WriteIMul64Imm : X86SchedWritePair; // Integer 64-bit multiplication by immediate. +defm WriteIMul64Reg : X86SchedWritePair; // Integer 64-bit multiplication by register. +def WriteIMulH : SchedWrite; // Integer multiplication, high part. + def WriteBSWAP32 : SchedWrite; // Byte Order (Endianness) 32-bit Swap. def WriteBSWAP64 : SchedWrite; // Byte Order (Endianness) 64-bit Swap. defm WriteCMPXCHG : X86SchedWritePair; // Compare and set, compare and swap. diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 470ef1f069a..e175d6f5840 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -78,8 +78,18 @@ def : WriteRes<WriteRMW, [AtomPort0]>; defm : AtomWriteResPair<WriteALU, [AtomPort01], [AtomPort0]>; defm : AtomWriteResPair<WriteADC, [AtomPort01], [AtomPort0]>; -defm : AtomWriteResPair<WriteIMul, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>; -defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>; + +defm : AtomWriteResPair<WriteIMul8, [AtomPort01], [AtomPort01], 7, 7, [7], [7]>; +defm : AtomWriteResPair<WriteIMul16, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; +defm : AtomWriteResPair<WriteIMul16Imm, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; +defm : AtomWriteResPair<WriteIMul16Reg, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; +defm : AtomWriteResPair<WriteIMul32, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; +defm : AtomWriteResPair<WriteIMul32Imm, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : AtomWriteResPair<WriteIMul32Reg, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : AtomWriteResPair<WriteIMul64, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>; +defm : AtomWriteResPair<WriteIMul64Imm, [AtomPort01], [AtomPort01], 14, 14, [14], [14]>; +defm : AtomWriteResPair<WriteIMul64Reg, [AtomPort01], [AtomPort01], 12, 12, [12], [12]>; +defm : X86WriteResUnsupported<WriteIMulH>; defm : X86WriteRes<WriteXCHG, [AtomPort01], 2, [2], 1>; defm : X86WriteRes<WriteBSWAP32, [AtomPort0], 1, [1], 1>; @@ -113,30 +123,9 @@ def : WriteRes<WriteLAHFSAHF, [AtomPort01]> { } def : WriteRes<WriteBitTest,[AtomPort01]>; -defm : X86WriteResUnsupported<WriteIMulH>; - // This is for simple LEAs with one or two input operands. def : WriteRes<WriteLEA, [AtomPort1]>; -def AtomWriteIMul16Ld : SchedWriteRes<[AtomPort01]> { - let Latency = 8; - let ResourceCycles = [8]; -} -def : InstRW<[AtomWriteIMul16Ld], (instrs MUL16m, IMUL16m)>; - -def AtomWriteIMul32 : SchedWriteRes<[AtomPort01]> { - let Latency = 6; - let ResourceCycles = [6]; -} -def : InstRW<[AtomWriteIMul32], (instrs MUL32r, IMUL32r)>; - -def AtomWriteIMul64I : SchedWriteRes<[AtomPort01]> { - let Latency = 14; - let ResourceCycles = [14]; -} -def : InstRW<[AtomWriteIMul64I], (instrs IMUL64rri8, IMUL64rri32, - IMUL64rmi8, IMUL64rmi32)>; - // Bit counts. defm : AtomWriteResPair<WriteBSF, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>; defm : AtomWriteResPair<WriteBSR, [AtomPort01], [AtomPort01], 16, 16, [16], [16]>; @@ -505,12 +494,6 @@ def : SchedAlias<WriteADCRMW, AtomWrite0_1>; def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m", "MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>; -def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> { - let Latency = 5; - let ResourceCycles = [5]; -} -def : InstRW<[AtomWrite0_5], (instregex "IMUL32(rm|rr)")>; - // Port1 def AtomWrite1_1 : SchedWriteRes<[AtomPort1]> { let Latency = 1; @@ -621,8 +604,7 @@ def : InstRW<[AtomWrite01_6], (instrs CMPXCHG8rm, INTO, XLAT, SHLD16rri8, SHRD16rri8, SHLD16mrCL, SHRD16mrCL, SHLD16mri8, SHRD16mri8)>; -def : InstRW<[AtomWrite01_6], (instregex "IMUL16rr", - "IST_F(P)?(16|32|64)?m", +def : InstRW<[AtomWrite01_6], (instregex "IST_F(P)?(16|32|64)?m", "MMX_PH(ADD|SUB)S?Wrm")>; def AtomWrite01_7 : SchedWriteRes<[AtomPort01]> { diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index d7fb6a32888..d4edbc4046f 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -164,9 +164,6 @@ def : WriteRes<WriteRMW, [JSAGU]>; defm : JWriteResIntPair<WriteALU, [JALU01], 1>; defm : JWriteResIntPair<WriteADC, [JALU01], 1, [2]>; -defm : JWriteResIntPair<WriteIMul, [JALU1, JMul], 3, [1, 1], 2>; // i8/i16/i32 multiplication -defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; // i64 multiplication -defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>; defm : X86WriteRes<WriteBSWAP32, [JALU01], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [JALU01], 1, [1], 1>; @@ -174,6 +171,18 @@ defm : X86WriteRes<WriteCMPXCHG,[JALU01], 1, [1], 1>; defm : X86WriteRes<WriteCMPXCHGRMW,[JALU01, JSAGU, JLAGU], 4, [1, 1, 1], 2>; defm : X86WriteRes<WriteXCHG, [JALU01], 1, [1], 1>; +defm : JWriteResIntPair<WriteIMul8, [JALU1, JMul], 3, [1, 1], 2>; +defm : JWriteResIntPair<WriteIMul16, [JALU1, JMul], 3, [1, 1], 2>; +defm : JWriteResIntPair<WriteIMul16Imm, [JALU1, JMul], 3, [1, 1], 2>; +defm : JWriteResIntPair<WriteIMul16Reg, [JALU1, JMul], 3, [1, 1], 2>; +defm : JWriteResIntPair<WriteIMul32, [JALU1, JMul], 3, [1, 1], 2>; +defm : JWriteResIntPair<WriteIMul32Imm, [JALU1, JMul], 3, [1, 1], 2>; +defm : JWriteResIntPair<WriteIMul32Reg, [JALU1, JMul], 3, [1, 1], 2>; +defm : JWriteResIntPair<WriteIMul64, [JALU1, JMul], 6, [1, 4], 2>; +defm : JWriteResIntPair<WriteIMul64Imm, [JALU1, JMul], 6, [1, 4], 2>; +defm : JWriteResIntPair<WriteIMul64Reg, [JALU1, JMul], 6, [1, 4], 2>; +defm : X86WriteRes<WriteIMulH, [JALU1], 6, [4], 1>; + defm : JWriteResIntPair<WriteDiv8, [JALU1, JDiv], 12, [1, 12], 1>; defm : JWriteResIntPair<WriteDiv16, [JALU1, JDiv], 17, [1, 17], 2>; defm : JWriteResIntPair<WriteDiv32, [JALU1, JDiv], 25, [1, 25], 2>; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 2ed7a8d7ee0..18200f78e89 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -95,8 +95,17 @@ def : InstRW<[WriteMove], (instrs COPY)>; defm : SLMWriteResPair<WriteALU, [SLM_IEC_RSV01], 1>; defm : SLMWriteResPair<WriteADC, [SLM_IEC_RSV01], 1>; -defm : SLMWriteResPair<WriteIMul, [SLM_IEC_RSV1], 3>; -defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>; + +defm : SLMWriteResPair<WriteIMul8, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul16, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul16Imm, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul16Reg, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul32, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul32Imm, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul32Reg, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul64, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul64Imm, [SLM_IEC_RSV1], 3>; +defm : SLMWriteResPair<WriteIMul64Reg, [SLM_IEC_RSV1], 3>; defm : X86WriteRes<WriteBSWAP32, [SLM_IEC_RSV01], 1, [1], 1>; defm : X86WriteRes<WriteBSWAP64, [SLM_IEC_RSV01], 1, [1], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 4539d0159b6..f3d2aa1fb0d 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -177,8 +177,17 @@ def : WriteRes<WriteZero, []>; def : WriteRes<WriteLEA, [ZnALU]>; defm : ZnWriteResPair<WriteALU, [ZnALU], 1>; defm : ZnWriteResPair<WriteADC, [ZnALU], 1>; -defm : ZnWriteResPair<WriteIMul, [ZnALU1, ZnMultiplier], 4>; -defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; + +defm : ZnWriteResPair<WriteIMul8, [ZnALU1, ZnMultiplier], 4>; +//defm : ZnWriteResPair<WriteIMul16, [ZnALU1, ZnMultiplier], 4>; +//defm : ZnWriteResPair<WriteIMul16Imm, [ZnALU1, ZnMultiplier], 4>; +//defm : ZnWriteResPair<WriteIMul16Reg, [ZnALU1, ZnMultiplier], 4>; +//defm : ZnWriteResPair<WriteIMul32, [ZnALU1, ZnMultiplier], 4>; +//defm : ZnWriteResPair<WriteIMul32Imm, [ZnALU1, ZnMultiplier], 4>; +//defm : ZnWriteResPair<WriteIMul32Reg, [ZnALU1, ZnMultiplier], 4>; +//defm : ZnWriteResPair<WriteIMul64, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; +//defm : ZnWriteResPair<WriteIMul64Imm, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; +//defm : ZnWriteResPair<WriteIMul64Reg, [ZnALU1, ZnMultiplier], 4, [1,1], 2>; defm : X86WriteRes<WriteBSWAP32, [ZnALU], 1, [4], 1>; defm : X86WriteRes<WriteBSWAP64, [ZnALU], 1, [4], 1>; @@ -581,45 +590,51 @@ def : InstRW<[WriteALULd], def ZnWriteMul16 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 3; } -def : InstRW<[ZnWriteMul16], (instrs IMUL16r, MUL16r)>; -def : InstRW<[ZnWriteMul16], (instrs IMUL16rr, IMUL16rri, IMUL16rri8)>; // TODO: is this right? -def : InstRW<[ZnWriteMul16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias<WriteIMul16, ZnWriteMul16>; +def : SchedAlias<WriteIMul16Imm, ZnWriteMul16>; // TODO: is this right? +def : SchedAlias<WriteIMul16Reg, ZnWriteMul16>; // TODO: is this right? +def : SchedAlias<WriteIMul16ImmLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias<WriteIMul16RegLd, ZnWriteMul16>; // TODO: this is definitely wrong but matches what the instregex did. // m16. def ZnWriteMul16Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; } -def : InstRW<[ZnWriteMul16Ld, ReadAfterLd], (instrs IMUL16m, MUL16m)>; +def : SchedAlias<WriteIMul16Ld, ZnWriteMul16Ld>; // r32. def ZnWriteMul32 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 3; } -def : InstRW<[ZnWriteMul32], (instrs IMUL32r, MUL32r)>; -def : InstRW<[ZnWriteMul32], (instrs IMUL32rr, IMUL32rri, IMUL32rri8)>; // TODO: is this right? -def : InstRW<[ZnWriteMul32], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias<WriteIMul32, ZnWriteMul32>; +def : SchedAlias<WriteIMul32Imm, ZnWriteMul32>; // TODO: is this right? +def : SchedAlias<WriteIMul32Reg, ZnWriteMul32>; // TODO: is this right? +def : SchedAlias<WriteIMul32ImmLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias<WriteIMul32RegLd, ZnWriteMul32>; // TODO: this is definitely wrong but matches what the instregex did. // m32. def ZnWriteMul32Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 8; } -def : InstRW<[ZnWriteMul32Ld, ReadAfterLd], (instrs IMUL32m, MUL32m)>; +def : SchedAlias<WriteIMul32Ld, ZnWriteMul32Ld>; // r64. def ZnWriteMul64 : SchedWriteRes<[ZnALU1, ZnMultiplier]> { let Latency = 4; let NumMicroOps = 2; } -def : InstRW<[ZnWriteMul64], (instrs IMUL64r, MUL64r)>; -def : InstRW<[ZnWriteMul64], (instrs IMUL64rr, IMUL64rri8, IMUL64rri32)>; // TODO: is this right? -def : InstRW<[ZnWriteMul64], (instrs IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias<WriteIMul64, ZnWriteMul64>; +def : SchedAlias<WriteIMul64Imm, ZnWriteMul64>; // TODO: is this right? +def : SchedAlias<WriteIMul64Reg, ZnWriteMul64>; // TODO: is this right? +def : SchedAlias<WriteIMul64ImmLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did. +def : SchedAlias<WriteIMul64RegLd, ZnWriteMul64>; // TODO: this is definitely wrong but matches what the instregex did. // m64. def ZnWriteMul64Ld : SchedWriteRes<[ZnAGU, ZnALU1, ZnMultiplier]> { let Latency = 9; let NumMicroOps = 2; } -def : InstRW<[ZnWriteMul64Ld, ReadAfterLd], (instrs IMUL64m, MUL64m)>; +def : SchedAlias<WriteIMul64Ld, ZnWriteMul64Ld>; // MULX. // r32,r32,r32. diff --git a/llvm/test/CodeGen/X86/bmi2-schedule.ll b/llvm/test/CodeGen/X86/bmi2-schedule.ll index 8df3c670d4d..5232e51ac73 100644 --- a/llvm/test/CodeGen/X86/bmi2-schedule.ll +++ b/llvm/test/CodeGen/X86/bmi2-schedule.ll @@ -110,8 +110,8 @@ define void @test_mulx_i32(i32 %a0, i32 %a1, i32* %a2) optsize { ; GENERIC-LABEL: test_mulx_i32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: mulxl %esi, %esi, %edi # sched: [3:1.00] -; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [8:1.00] +; GENERIC-NEXT: mulxl %esi, %esi, %edi # sched: [4:1.00] +; GENERIC-NEXT: mulxl (%rdx), %esi, %edi # sched: [9:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -163,8 +163,8 @@ define i64 @test_mulx_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: movq %rdx, %rax # sched: [1:0.33] ; GENERIC-NEXT: movq %rdi, %rdx # sched: [1:0.33] -; GENERIC-NEXT: mulxq %rsi, %rsi, %rcx # sched: [3:1.00] -; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [8:1.00] +; GENERIC-NEXT: mulxq %rsi, %rsi, %rcx # sched: [4:1.00] +; GENERIC-NEXT: mulxq (%rax), %rdx, %rax # sched: [9:1.00] ; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s index 4f043e33240..4721b94b7ca 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-bmi2.s @@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx # CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx # CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx -# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx -# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx +# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx # CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx # CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx # CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-bmi2.s index d0a4e3e3701..6c7382aef25 100644 --- a/llvm/test/tools/llvm-mca/X86/Generic/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-bmi2.s @@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: 2 6 1.00 * bzhil %eax, (%rbx), %ecx # CHECK-NEXT: 1 1 1.00 bzhiq %rax, %rbx, %rcx # CHECK-NEXT: 2 6 1.00 * bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 2 3 1.00 mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 3 8 1.00 * mulxl (%rax), %ebx, %ecx -# CHECK-NEXT: 2 3 1.00 mulxq %rax, %rbx, %rcx -# CHECK-NEXT: 3 8 1.00 * mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx +# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx # CHECK-NEXT: 1 1 0.33 pdepl %eax, %ebx, %ecx # CHECK-NEXT: 2 6 0.50 * pdepl (%rax), %ebx, %ecx # CHECK-NEXT: 1 1 0.33 pdepq %rax, %rbx, %rcx @@ -103,7 +103,7 @@ shrx %rax, (%rbx), %rcx # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 10.67 10.67 - 10.67 8.00 8.00 +# CHECK-NEXT: - - 14.33 11.33 - 12.33 8.00 8.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -111,10 +111,10 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bzhil %eax, (%rbx), %ecx # CHECK-NEXT: - - - 1.00 - - - - bzhiq %rax, %rbx, %rcx # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: - - - 1.00 - - - - mulxl %eax, %ebx, %ecx -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 mulxl (%rax), %ebx, %ecx -# CHECK-NEXT: - - - 1.00 - - - - mulxq %rax, %rbx, %rcx -# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: - - 0.83 1.33 - 0.83 - - mulxl %eax, %ebx, %ecx +# CHECK-NEXT: - - 0.83 1.33 - 0.83 0.50 0.50 mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - mulxq %rax, %rbx, %rcx +# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 mulxq (%rax), %rbx, %rcx # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pdepl %eax, %ebx, %ecx # CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pdepl (%rax), %ebx, %ecx # CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pdepq %rax, %rbx, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s index 980e48141aa..d0be7bf824f 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-bmi2.s @@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx # CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx # CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx -# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx -# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx +# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx # CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx # CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx # CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s index a39b7843e17..062c782bac5 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-bmi2.s @@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx # CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx # CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx -# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx -# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx +# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx # CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx # CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx # CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s index d4af18b4c19..f5e41354fba 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-bmi2.s @@ -62,10 +62,10 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: 2 6 0.50 * bzhil %eax, (%rbx), %ecx # CHECK-NEXT: 1 1 0.50 bzhiq %rax, %rbx, %rcx # CHECK-NEXT: 2 6 0.50 * bzhiq %rax, (%rbx), %rcx -# CHECK-NEXT: 3 4 1.00 mulxl %eax, %ebx, %ecx -# CHECK-NEXT: 4 9 1.00 * mulxl (%rax), %ebx, %ecx -# CHECK-NEXT: 2 4 1.00 mulxq %rax, %rbx, %rcx -# CHECK-NEXT: 3 9 1.00 * mulxq (%rax), %rbx, %rcx +# CHECK-NEXT: 4 4 1.00 mulxl %eax, %ebx, %ecx +# CHECK-NEXT: 5 9 1.00 * mulxl (%rax), %ebx, %ecx +# CHECK-NEXT: 3 4 1.00 mulxq %rax, %rbx, %rcx +# CHECK-NEXT: 4 9 1.00 * mulxq (%rax), %rbx, %rcx # CHECK-NEXT: 1 3 1.00 pdepl %eax, %ebx, %ecx # CHECK-NEXT: 2 8 1.00 * pdepl (%rax), %ebx, %ecx # CHECK-NEXT: 1 3 1.00 pdepq %rax, %rbx, %rcx |

