diff options
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 49 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 19 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/schedule-x86_64.ll | 76 |
6 files changed, 119 insertions, 73 deletions
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 4f9c67431d7..9c178bb8ee0 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -821,7 +821,7 @@ def: InstRW<[BWWriteResGroup27], (instregex "ADD_FPrST0", "(V?)CVTTPS2DQ(Y?)rr")>; def BWWriteResGroup27_16 : SchedWriteRes<[BWPort1, BWPort0156]> { - let Latency = 3; // FIXME: I think this should be 4. + let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -1017,6 +1017,7 @@ def: InstRW<[BWWriteResGroup42], (instregex "MMX_CVTPD2PIirr", def BWWriteResGroup42_16 : SchedWriteRes<[BWPort1,BWPort06,BWPort0156]> { let Latency = 4; let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } def: InstRW<[BWWriteResGroup42_16], (instrs IMUL16r, MUL16r)>; @@ -1765,7 +1766,6 @@ def BWWriteResGroup91 : SchedWriteRes<[BWPort1,BWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup91], (instrs IMUL8m, MUL8m)>; def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm", "MMX_CVTPS2PIirm", "MMX_CVTTPS2PIirm", @@ -1786,15 +1786,16 @@ def: InstRW<[BWWriteResGroup91], (instregex "MMX_CVTPI2PSirm", "(V?)SUBSSrm")>; def BWWriteResGroup91_16 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { - let Latency = 8; // FIXME: I think this should be 9 + let Latency = 8; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[BWWriteResGroup91_16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; // TODO: Is IMUL16rm really 3 uops? +def: InstRW<[BWWriteResGroup91_16], (instrs IMUL16rmi, IMUL16rmi8)>; -def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort0156, BWPort23]> { - let Latency = 8; +def BWWriteResGroup91_16_2 : SchedWriteRes<[BWPort1, BWPort06, BWPort0156, BWPort23]> { + let Latency = 9; let NumMicroOps = 5; + let ResourceCycles = [1,1,2,1]; } def: InstRW<[BWWriteResGroup91_16_2], (instrs IMUL16m, MUL16m)>; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 20c4f811e38..32fec40595b 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1023,9 +1023,6 @@ def HWWriteResGroup12 : SchedWriteRes<[HWPort1,HWPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup12], (instrs MUL8m, MUL16m, - IMUL8m, IMUL16m, - IMUL16rm, IMUL16rmi, IMUL16rmi8, IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m", "FCOM64m", "FCOMP32m", @@ -1046,6 +1043,20 @@ def: InstRW<[HWWriteResGroup12], (instregex "FCOM32m", "(V?)SUBSDrm", "(V?)SUBSSrm")>; +def HWWriteResGroup12_1 : SchedWriteRes<[HWPort1,HWPort0156,HWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[HWWriteResGroup12_1], (instrs IMUL16rmi, IMUL16rmi8)>; + +def HWWriteResGroup12_2 : SchedWriteRes<[HWPort1,HWPort06,HWPort0156,HWPort23]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,1,2,1]; +} +def: InstRW<[HWWriteResGroup12_2], (instrs IMUL16m, MUL16m)>; + def HWWriteResGroup13 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 7; let NumMicroOps = 2; @@ -1703,7 +1714,6 @@ def HWWriteResGroup50 : SchedWriteRes<[HWPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup50], (instrs MUL8r, IMUL8r, IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>; def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0", "ADD_FST0r", "ADD_FrST0", @@ -1733,7 +1743,7 @@ def: InstRW<[HWWriteResGroup50], (instregex "ADD_FPrST0", "(V?)SUBSSrr")>; def HWWriteResGroup50_16i : SchedWriteRes<[HWPort1, HWPort0156]> { - let Latency = 3; + let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -2103,9 +2113,10 @@ def HWWriteResGroup74 : SchedWriteRes<[HWPort1,HWPort6]> { } def: InstRW<[HWWriteResGroup74], (instrs IMUL64r, MUL64r, MULX64rr)>; -def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort0156]> { +def HWWriteResGroup74_16 : SchedWriteRes<[HWPort1, HWPort06, HWPort0156]> { let Latency = 4; let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } def: InstRW<[HWWriteResGroup74_16], (instrs IMUL16r, MUL16r)>; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 8e56cdabfa4..d46fdb10735 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -646,7 +646,7 @@ def: InstRW<[SBWriteResGroup21], (instregex "ADD_FPrST0", "(V?)ROUNDSSr")>; def SBWriteResGroup21_16i : SchedWriteRes<[SBPort1, SBPort015]> { - let Latency = 3; + let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -743,13 +743,26 @@ def: InstRW<[SBWriteResGroup26_2], (instregex "COM_FIPr", "UCOM_FIPr", "UCOM_FIr")>; -// FIXME: this is probably incorrect. def SBWriteResGroup27 : SchedWriteRes<[SBPort0,SBPort1]> { let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup27], (instrs MUL16r, MUL32r, MUL64r)>; +def: InstRW<[SBWriteResGroup27], (instrs IMUL64r, MUL64r)>; + +def SBWriteResGroup27_1 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup27_1], (instrs IMUL32r, MUL32r)>; + +def SBWriteResGroup27_2 : SchedWriteRes<[SBPort1,SBPort05,SBPort015]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; +} +def: InstRW<[SBWriteResGroup27_2], (instrs IMUL16r, MUL16r)>; def SBWriteResGroup28 : SchedWriteRes<[SBPort1,SBPort5]> { let Latency = 4; @@ -1534,8 +1547,34 @@ def: InstRW<[SBWriteResGroup93], (instregex "CVTSD2SI64rm", "CVTTSD2SIrm", "CVTTSS2SI64rm", "CVTTSS2SIrm")>; -// FIXME this is probably incorrect. -def: InstRW<[SBWriteResGroup93], (instrs MUL16m, MUL32m, MUL64m)>; + +def SBWriteResGroup93_1 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup93_1], (instrs IMUL64m, MUL64m)>; + +def SBWriteResGroup93_2 : SchedWriteRes<[SBPort1,SBPort23,SBPort05,SBPort015]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1,1,1,1]; +} +def: InstRW<[SBWriteResGroup93_2], (instrs IMUL32m, MUL32m)>; + +def SBWriteResGroup93_3 : SchedWriteRes<[SBPort1,SBPort05,SBPort015,SBPort23]> { + let Latency = 9; + let NumMicroOps = 5; + let ResourceCycles = [1,1,2,1]; +} +def: InstRW<[SBWriteResGroup93_3], (instrs IMUL16m, MUL16m)>; + +def SBWriteResGroup93_4 : SchedWriteRes<[SBPort1,SBPort015,SBPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1,1,1]; +} +def: InstRW<[SBWriteResGroup93_4], (instrs IMUL16rmi, IMUL16rmi8)>; def SBWriteResGroup94 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { let Latency = 9; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 16fce1e607e..06b38703412 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -837,15 +837,13 @@ def SKLWriteResGroup29 : SchedWriteRes<[SKLPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKLWriteResGroup29], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>; -def: InstRW<[SKLWriteResGroup29], (instrs IMUL8r, MUL8r)>; def: InstRW<[SKLWriteResGroup29], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr", "SHLD(16|32|64)rri8", "SHRD(16|32|64)rri8")>; def SKLWriteResGroup29_16i : SchedWriteRes<[SKLPort1, SKLPort0156]> { - let Latency = 3; + let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -1097,12 +1095,12 @@ def SKLWriteResGroup51 : SchedWriteRes<[SKLPort1,SKLPort5]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup51], (instrs IMUL64r, MUL64r, - MULX64rr)>; +def: InstRW<[SKLWriteResGroup51], (instrs IMUL64r, MUL64r, MULX64rr)>; def SKLWriteResGroup51_16 : SchedWriteRes<[SKLPort1,SKLPort06,SKLPort0156]> { let Latency = 4; let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } def: InstRW<[SKLWriteResGroup51_16], (instrs IMUL16r, MUL16r)>; @@ -1774,8 +1772,6 @@ def SKLWriteResGroup107 : SchedWriteRes<[SKLPort1,SKLPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKLWriteResGroup107], (instrs IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; -def: InstRW<[SKLWriteResGroup107], (instrs IMUL8m, MUL8m)>; def: InstRW<[SKLWriteResGroup107], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; @@ -1784,11 +1780,12 @@ def SKLWriteResGroup107_16 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SKLWriteResGroup107_16], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; +def: InstRW<[SKLWriteResGroup107_16], (instrs IMUL16rmi, IMUL16rmi8)>; -def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort0156, SKLPort23]> { - let Latency = 8; +def SKLWriteResGroup107_16_2 : SchedWriteRes<[SKLPort1, SKLPort06, SKLPort0156, SKLPort23]> { + let Latency = 9; let NumMicroOps = 5; + let ResourceCycles = [1,1,2,1]; } def: InstRW<[SKLWriteResGroup107_16_2], (instrs IMUL16m, MUL16m)>; @@ -2232,7 +2229,7 @@ def SKLWriteResGroup142 : SchedWriteRes<[SKLPort1,SKLPort23,SKLPort06,SKLPort015 let NumMicroOps = 4; let ResourceCycles = [1,1,1,1]; } -def: InstRW<[SKLWriteResGroup142], (instrs IMUL32rm, MUL32m, MULX32rm)>; +def: InstRW<[SKLWriteResGroup142], (instrs IMUL32m, MUL32m, MULX32rm)>; def SKLWriteResGroup143 : SchedWriteRes<[SKLPort4,SKLPort6,SKLPort23,SKLPort237,SKLPort06,SKLPort0156]> { let Latency = 10; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 09c613366ad..196f6d97f11 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1650,15 +1650,13 @@ def SKXWriteResGroup31 : SchedWriteRes<[SKXPort1]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup31], (instrs IMUL16rr, IMUL32rr, IMUL32rri, IMUL32rri8, IMUL64rr, IMUL64rri32, IMUL64rri8)>; -def: InstRW<[SKXWriteResGroup31], (instrs IMUL8r, MUL8r)>; def: InstRW<[SKXWriteResGroup31], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr", "SHLD(16|32|64)rri8", "SHRD(16|32|64)rri8")>; def SKXWriteResGroup31_16i : SchedWriteRes<[SKXPort1, SKXPort0156]> { - let Latency = 3; + let Latency = 4; let NumMicroOps = 2; let ResourceCycles = [1,1]; } @@ -2438,6 +2436,7 @@ def: InstRW<[SKXWriteResGroup52], (instrs IMUL64r, MUL64r, MULX64rr)>; def SKXWriteResGroup52_16 : SchedWriteRes<[SKXPort1,SKXPort06,SKXPort0156]> { let Latency = 4; let NumMicroOps = 4; + let ResourceCycles = [1,1,2]; } def: InstRW<[SKXWriteResGroup52_16], (instrs IMUL16r, MUL16r)>; @@ -3702,8 +3701,6 @@ def SKXWriteResGroup118 : SchedWriteRes<[SKXPort1,SKXPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup118], (instrs IMUL32rm, IMUL32rmi, IMUL32rmi8, IMUL64rm, IMUL64rmi32, IMUL64rmi8)>; -def: InstRW<[SKXWriteResGroup118], (instrs IMUL8m, MUL8m)>; def: InstRW<[SKXWriteResGroup118], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; @@ -3714,9 +3711,10 @@ def SKXWriteResGroup118_16_1 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> } def: InstRW<[SKXWriteResGroup118_16_1], (instrs IMUL16rm, IMUL16rmi, IMUL16rmi8)>; -def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort0156, SKXPort23]> { - let Latency = 8; +def SKXWriteResGroup118_16_2 : SchedWriteRes<[SKXPort1, SKXPort06, SKXPort0156, SKXPort23]> { + let Latency = 9; let NumMicroOps = 5; + let ResourceCycles = [1,1,2,1]; } def: InstRW<[SKXWriteResGroup118_16_2], (instrs IMUL16m, MUL16m)>; diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 859591f7045..eaad28d6522 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -5624,15 +5624,15 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; GENERIC-LABEL: test_imul_16: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: imulw %di # sched: [3:1.00] -; GENERIC-NEXT: imulw (%rsi) # sched: [8:1.00] +; GENERIC-NEXT: imulw %di # sched: [4:1.33] +; GENERIC-NEXT: imulw (%rsi) # sched: [9:1.33] ; GENERIC-NEXT: imulw %di, %di # sched: [3:1.00] ; GENERIC-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; GENERIC-NEXT: imulw $511, %di, %di # imm = 0x1FF -; GENERIC-NEXT: # sched: [3:1.00] +; GENERIC-NEXT: # sched: [4:1.00] ; GENERIC-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF ; GENERIC-NEXT: # sched: [8:1.00] -; GENERIC-NEXT: imulw $7, %di, %di # sched: [3:1.00] +; GENERIC-NEXT: imulw $7, %di, %di # sched: [4:1.00] ; GENERIC-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -5672,15 +5672,15 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; SANDY-LABEL: test_imul_16: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: imulw %di # sched: [3:1.00] -; SANDY-NEXT: imulw (%rsi) # sched: [8:1.00] +; SANDY-NEXT: imulw %di # sched: [4:1.33] +; SANDY-NEXT: imulw (%rsi) # sched: [9:1.33] ; SANDY-NEXT: imulw %di, %di # sched: [3:1.00] ; SANDY-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; SANDY-NEXT: imulw $511, %di, %di # imm = 0x1FF -; SANDY-NEXT: # sched: [3:1.00] +; SANDY-NEXT: # sched: [4:1.00] ; SANDY-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF ; SANDY-NEXT: # sched: [8:1.00] -; SANDY-NEXT: imulw $7, %di, %di # sched: [3:1.00] +; SANDY-NEXT: imulw $7, %di, %di # sched: [4:1.00] ; SANDY-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] @@ -5689,14 +5689,14 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP ; HASWELL-NEXT: imulw %di # sched: [4:1.00] -; HASWELL-NEXT: imulw (%rsi) # sched: [8:1.00] +; HASWELL-NEXT: imulw (%rsi) # sched: [9:1.00] ; HASWELL-NEXT: imulw %di, %di # sched: [3:1.00] ; HASWELL-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; HASWELL-NEXT: imulw $511, %di, %di # imm = 0x1FF -; HASWELL-NEXT: # sched: [3:1.00] +; HASWELL-NEXT: # sched: [4:1.00] ; HASWELL-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF ; HASWELL-NEXT: # sched: [8:1.00] -; HASWELL-NEXT: imulw $7, %di, %di # sched: [3:1.00] +; HASWELL-NEXT: imulw $7, %di, %di # sched: [4:1.00] ; HASWELL-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retq # sched: [7:1.00] @@ -5705,14 +5705,14 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP ; BROADWELL-NEXT: imulw %di # sched: [4:1.00] -; BROADWELL-NEXT: imulw (%rsi) # sched: [8:1.00] +; BROADWELL-NEXT: imulw (%rsi) # sched: [9:1.00] ; BROADWELL-NEXT: imulw %di, %di # sched: [3:1.00] ; BROADWELL-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; BROADWELL-NEXT: imulw $511, %di, %di # imm = 0x1FF -; BROADWELL-NEXT: # sched: [3:1.00] +; BROADWELL-NEXT: # sched: [4:1.00] ; BROADWELL-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF ; BROADWELL-NEXT: # sched: [8:1.00] -; BROADWELL-NEXT: imulw $7, %di, %di # sched: [3:1.00] +; BROADWELL-NEXT: imulw $7, %di, %di # sched: [4:1.00] ; BROADWELL-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retq # sched: [7:1.00] @@ -5721,14 +5721,14 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP ; SKYLAKE-NEXT: imulw %di # sched: [4:1.00] -; SKYLAKE-NEXT: imulw (%rsi) # sched: [8:1.00] +; SKYLAKE-NEXT: imulw (%rsi) # sched: [9:1.00] ; SKYLAKE-NEXT: imulw %di, %di # sched: [3:1.00] ; SKYLAKE-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; SKYLAKE-NEXT: imulw $511, %di, %di # imm = 0x1FF -; SKYLAKE-NEXT: # sched: [3:1.00] +; SKYLAKE-NEXT: # sched: [4:1.00] ; SKYLAKE-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF ; SKYLAKE-NEXT: # sched: [8:1.00] -; SKYLAKE-NEXT: imulw $7, %di, %di # sched: [3:1.00] +; SKYLAKE-NEXT: imulw $7, %di, %di # sched: [4:1.00] ; SKYLAKE-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retq # sched: [7:1.00] @@ -5737,14 +5737,14 @@ define void @test_imul_16(i16 %a0, i16* %a1) optsize { ; SKX: # %bb.0: ; SKX-NEXT: #APP ; SKX-NEXT: imulw %di # sched: [4:1.00] -; SKX-NEXT: imulw (%rsi) # sched: [8:1.00] +; SKX-NEXT: imulw (%rsi) # sched: [9:1.00] ; SKX-NEXT: imulw %di, %di # sched: [3:1.00] ; SKX-NEXT: imulw (%rsi), %di # sched: [8:1.00] ; SKX-NEXT: imulw $511, %di, %di # imm = 0x1FF -; SKX-NEXT: # sched: [3:1.00] +; SKX-NEXT: # sched: [4:1.00] ; SKX-NEXT: imulw $511, (%rsi), %di # imm = 0x1FF ; SKX-NEXT: # sched: [8:1.00] -; SKX-NEXT: imulw $7, %di, %di # sched: [3:1.00] +; SKX-NEXT: imulw $7, %di, %di # sched: [4:1.00] ; SKX-NEXT: imulw $7, (%rsi), %di # sched: [8:1.00] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retq # sched: [7:1.00] @@ -5787,8 +5787,8 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; GENERIC-LABEL: test_imul_32: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: imull %edi # sched: [3:1.00] -; GENERIC-NEXT: imull (%rsi) # sched: [8:1.00] +; GENERIC-NEXT: imull %edi # sched: [4:1.00] +; GENERIC-NEXT: imull (%rsi) # sched: [9:1.00] ; GENERIC-NEXT: imull %edi, %edi # sched: [3:1.00] ; GENERIC-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; GENERIC-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 @@ -5835,8 +5835,8 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; SANDY-LABEL: test_imul_32: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: imull %edi # sched: [3:1.00] -; SANDY-NEXT: imull (%rsi) # sched: [8:1.00] +; SANDY-NEXT: imull %edi # sched: [4:1.00] +; SANDY-NEXT: imull (%rsi) # sched: [9:1.00] ; SANDY-NEXT: imull %edi, %edi # sched: [3:1.00] ; SANDY-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; SANDY-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 @@ -5884,9 +5884,9 @@ define void @test_imul_32(i32 %a0, i32* %a1) optsize { ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP ; SKYLAKE-NEXT: imull %edi # sched: [4:1.00] -; SKYLAKE-NEXT: imull (%rsi) # sched: [8:1.00] +; SKYLAKE-NEXT: imull (%rsi) # sched: [9:1.00] ; SKYLAKE-NEXT: imull %edi, %edi # sched: [3:1.00] -; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [9:1.00] +; SKYLAKE-NEXT: imull (%rsi), %edi # sched: [8:1.00] ; SKYLAKE-NEXT: imull $665536, %edi, %edi # imm = 0xA27C0 ; SKYLAKE-NEXT: # sched: [3:1.00] ; SKYLAKE-NEXT: imull $665536, (%rsi), %edi # imm = 0xA27C0 @@ -5950,8 +5950,8 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; GENERIC-LABEL: test_imul_64: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: imulq %rdi # sched: [3:1.00] -; GENERIC-NEXT: imulq (%rsi) # sched: [8:1.00] +; GENERIC-NEXT: imulq %rdi # sched: [4:1.00] +; GENERIC-NEXT: imulq (%rsi) # sched: [9:1.00] ; GENERIC-NEXT: imulq %rdi, %rdi # sched: [3:1.00] ; GENERIC-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; GENERIC-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 @@ -5998,8 +5998,8 @@ define void @test_imul_64(i64 %a0, i64* %a1) optsize { ; SANDY-LABEL: test_imul_64: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: imulq %rdi # sched: [3:1.00] -; SANDY-NEXT: imulq (%rsi) # sched: [8:1.00] +; SANDY-NEXT: imulq %rdi # sched: [4:1.00] +; SANDY-NEXT: imulq (%rsi) # sched: [9:1.00] ; SANDY-NEXT: imulq %rdi, %rdi # sched: [3:1.00] ; SANDY-NEXT: imulq (%rsi), %rdi # sched: [8:1.00] ; SANDY-NEXT: imulq $665536, %rdi, %rdi # imm = 0xA27C0 @@ -7961,8 +7961,8 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; GENERIC-NEXT: #APP ; GENERIC-NEXT: mulb %dil # sched: [3:1.00] ; GENERIC-NEXT: mulb (%r8) # sched: [8:1.00] -; GENERIC-NEXT: mulw %si # sched: [4:1.00] -; GENERIC-NEXT: mulw (%r9) # sched: [9:1.00] +; GENERIC-NEXT: mulw %si # sched: [4:1.33] +; GENERIC-NEXT: mulw (%r9) # sched: [9:1.33] ; GENERIC-NEXT: mull %edx # sched: [4:1.00] ; GENERIC-NEXT: mull (%rax) # sched: [9:1.00] ; GENERIC-NEXT: mulq %rcx # sched: [4:1.00] @@ -8009,8 +8009,8 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; SANDY-NEXT: #APP ; SANDY-NEXT: mulb %dil # sched: [3:1.00] ; SANDY-NEXT: mulb (%r8) # sched: [8:1.00] -; SANDY-NEXT: mulw %si # sched: [4:1.00] -; SANDY-NEXT: mulw (%r9) # sched: [9:1.00] +; SANDY-NEXT: mulw %si # sched: [4:1.33] +; SANDY-NEXT: mulw (%r9) # sched: [9:1.33] ; SANDY-NEXT: mull %edx # sched: [4:1.00] ; SANDY-NEXT: mull (%rax) # sched: [9:1.00] ; SANDY-NEXT: mulq %rcx # sched: [4:1.00] @@ -8026,7 +8026,7 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; HASWELL-NEXT: mulb %dil # sched: [3:1.00] ; HASWELL-NEXT: mulb (%r8) # sched: [8:1.00] ; HASWELL-NEXT: mulw %si # sched: [4:1.00] -; HASWELL-NEXT: mulw (%r9) # sched: [8:1.00] +; HASWELL-NEXT: mulw (%r9) # sched: [9:1.00] ; HASWELL-NEXT: mull %edx # sched: [4:1.00] ; HASWELL-NEXT: mull (%rax) # sched: [9:1.00] ; HASWELL-NEXT: mulq %rcx # sched: [4:1.00] @@ -8042,7 +8042,7 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; BROADWELL-NEXT: mulb %dil # sched: [3:1.00] ; BROADWELL-NEXT: mulb (%r8) # sched: [8:1.00] ; BROADWELL-NEXT: mulw %si # sched: [4:1.00] -; BROADWELL-NEXT: mulw (%r9) # sched: [8:1.00] +; BROADWELL-NEXT: mulw (%r9) # sched: [9:1.00] ; BROADWELL-NEXT: mull %edx # sched: [4:1.00] ; BROADWELL-NEXT: mull (%rax) # sched: [9:1.00] ; BROADWELL-NEXT: mulq %rcx # sched: [4:1.00] @@ -8058,7 +8058,7 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; SKYLAKE-NEXT: mulb %dil # sched: [3:1.00] ; SKYLAKE-NEXT: mulb (%r8) # sched: [8:1.00] ; SKYLAKE-NEXT: mulw %si # sched: [4:1.00] -; SKYLAKE-NEXT: mulw (%r9) # sched: [8:1.00] +; SKYLAKE-NEXT: mulw (%r9) # sched: [9:1.00] ; SKYLAKE-NEXT: mull %edx # sched: [4:1.00] ; SKYLAKE-NEXT: mull (%rax) # sched: [9:1.00] ; SKYLAKE-NEXT: mulq %rcx # sched: [4:1.00] @@ -8074,7 +8074,7 @@ define void @test_mul(i8 %a0, i16 %a1, i32 %a2, i64 %a3, i8 *%p0, i16 *%p1, i32 ; SKX-NEXT: mulb %dil # sched: [3:1.00] ; SKX-NEXT: mulb (%r8) # sched: [8:1.00] ; SKX-NEXT: mulw %si # sched: [4:1.00] -; SKX-NEXT: mulw (%r9) # sched: [8:1.00] +; SKX-NEXT: mulw (%r9) # sched: [9:1.00] ; SKX-NEXT: mull %edx # sched: [4:1.00] ; SKX-NEXT: mull (%rax) # sched: [9:1.00] ; SKX-NEXT: mulq %rcx # sched: [4:1.00] |

