diff options
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 9 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 7 | ||||
-rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 23 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86Schedule.td | 3 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleAtom.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx2-schedule.ll | 8 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/avx512-schedule.ll | 2 |
12 files changed, 21 insertions, 43 deletions
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 25a49cb9714..3b58b528097 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -191,7 +191,8 @@ def : WriteRes<WriteVecStore, [BWPort237, BWPort4]>; def : WriteRes<WriteVecMove, [BWPort015]>; defm : BWWriteResPair<WriteVecALU, [BWPort15], 1>; // Vector integer ALU op, no logicals. -defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1>; // Vector integer and/or/xor. +defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor. +defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM). defm : BWWriteResPair<WriteVecShift, [BWPort0], 1>; // Vector integer shifts. defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5>; // Vector integer multiply. defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // PMULLD @@ -1162,11 +1163,7 @@ def BWWriteResGroup77 : SchedWriteRes<[BWPort23,BWPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[BWWriteResGroup77], (instregex "VPANDNYrm", - "VPANDYrm", - "VPBLENDDYrmi", - "VPORYrm", - "VPXORYrm")>; +def: InstRW<[BWWriteResGroup77], (instregex "VPBLENDDYrmi")>; def BWWriteResGroup79 : SchedWriteRes<[BWPort5,BWPort23]> { let Latency = 7; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index d9b7be006de..0c36c5be315 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -188,6 +188,7 @@ def : WriteRes<WriteVecMove, [HWPort015]>; defm : HWWriteResPair<WriteVecShift, [HWPort0], 1>; defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 6>; +defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>; defm : HWWriteResPair<WriteVecALU, [HWPort15], 1>; defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5>; defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>; @@ -1071,11 +1072,7 @@ def HWWriteResGroup17_2 : SchedWriteRes<[HWPort23,HWPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup17_2], (instregex "VPANDNYrm", - "VPANDYrm", - "VPBLENDDYrmi", - "VPORYrm", - "VPXORYrm")>; +def: InstRW<[HWWriteResGroup17_2], (instregex "VPBLENDDYrmi")>; def HWWriteResGroup18 : SchedWriteRes<[HWPort23,HWPort0156]> { let Latency = 6; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 2ae4bc093b0..f2d6a3f4a64 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -168,6 +168,7 @@ def : WriteRes<WriteVecMove, [SBPort05]>; defm : SBWriteResPair<WriteVecShift, [SBPort5], 1>; defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 6>; +defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>; defm : SBWriteResPair<WriteVecALU, [SBPort1], 3>; defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5>; defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; // TODO this is probably wrong for 256/512-bit for the "generic" model diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 5e02e43bd9f..3f68e927dc5 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -189,6 +189,7 @@ def : WriteRes<WriteVecMove, [SKLPort015]>; defm : SKLWriteResPair<WriteVecALU, [SKLPort15], 1>; // Vector integer ALU op, no logicals. defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor. +defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). defm : SKLWriteResPair<WriteVecShift, [SKLPort0], 1>; // Vector integer shifts. defm : SKLWriteResPair<WriteVecIMul, [SKLPort0], 5>; // Vector integer multiply. defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; @@ -1609,17 +1610,13 @@ def: InstRW<[SKLWriteResGroup110], (instregex "VMASKMOVPDYrm", "VPADDDYrm", "VPADDQYrm", "VPADDWYrm", - "VPANDNYrm", - "VPANDYrm", "VPBLENDDYrmi", "VPMASKMOVDYrm", "VPMASKMOVQYrm", - "VPORYrm", "VPSUBBYrm", "VPSUBDYrm", "VPSUBQYrm", - "VPSUBWYrm", - "VPXORYrm")>; + "VPSUBWYrm")>; def SKLWriteResGroup112 : SchedWriteRes<[SKLPort0,SKLPort5,SKLPort23]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index a9693bf820c..cce237cfe50 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -189,6 +189,7 @@ def : WriteRes<WriteVecMove, [SKXPort015]>; defm : SKXWriteResPair<WriteVecALU, [SKXPort15], 1>; // Vector integer ALU op, no logicals. defm : SKXWriteResPair<WriteVecLogic, [SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor. +defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1>; // Vector integer shifts. defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 5>; // Vector integer multiply. defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector integer multiply. @@ -3034,16 +3035,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VPADDWYrm", "VPADDWZ256rm(b?)", "VPADDWZrm(b?)", - "VPANDDZ256rm(b?)", - "VPANDDZrm(b?)", - "VPANDNDZ256rm(b?)", - "VPANDNDZrm(b?)", - "VPANDNQZ256rm(b?)", - "VPANDNQZrm(b?)", - "VPANDNYrm", - "VPANDQZ256rm(b?)", - "VPANDQZrm(b?)", - "VPANDYrm", "VPBLENDDYrmi", "VPBLENDMBZ256rm(b?)", "VPBLENDMBZrm(b?)", @@ -3059,11 +3050,6 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VPBROADCASTQZm(b?)", "VPMASKMOVDYrm", "VPMASKMOVQYrm", - "VPORDZ256rm(b?)", - "VPORDZrm(b?)", - "VPORQZ256rm(b?)", - "VPORQZrm(b?)", - "VPORYrm", "VPSUBBYrm", "VPSUBBZ256rm(b?)", "VPSUBBZrm(b?)", @@ -3078,12 +3064,7 @@ def: InstRW<[SKXWriteResGroup121], (instregex "VBLENDMPDZ256rm(b?)", "VPTERNLOGDZ256rm(b?)i", "VPTERNLOGDZrm(b?)i", "VPTERNLOGQZ256rm(b?)i", - "VPTERNLOGQZrm(b?)i", - "VPXORDZ256rm(b?)", - "VPXORDZrm(b?)", - "VPXORQZ256rm(b?)", - "VPXORQZrm(b?)", - "VPXORYrm")>; + "VPTERNLOGQZrm(b?)i")>; def SKXWriteResGroup123 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort23]> { let Latency = 8; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 77e3fc51828..758f035e5ef 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -126,6 +126,7 @@ def WriteVecStore : SchedWrite; def WriteVecMove : SchedWrite; defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. +defm WriteVecLogicY: X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM). defm WriteVecShift : X86SchedWritePair; // Vector integer shifts. defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply. defm WritePMULLD : X86SchedWritePair; // PMULLD @@ -225,7 +226,7 @@ def SchedWriteVecALU : X86SchedWriteWidths<WriteVecALU, WriteVecALU, WriteVecALU, WriteVecALU>; def SchedWriteVecLogic : X86SchedWriteWidths<WriteVecLogic, WriteVecLogic, - WriteVecLogic, WriteVecLogic>; + WriteVecLogicY, WriteVecLogicY>; def SchedWriteVecShift : X86SchedWriteWidths<WriteVecShift, WriteVecShift, WriteVecShift, WriteVecShift>; diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index c1c58f0d20d..3090d25b516 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -245,6 +245,7 @@ def : WriteRes<WriteVecMove, [AtomPort01]>; defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>; +defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index ffa8e091ec2..10a695d4b7f 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -407,6 +407,7 @@ defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>; +defm : JWriteResFpuPair<WriteVecLogicY, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVarShuffle256, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. defm : JWriteResFpuPair<WriteVarVecShift, [JFPU01, JVALU], 1>; // NOTE: Doesn't exist on Jaguar. diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 6d2b7d1f22b..9ea9eb00cee 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -156,6 +156,7 @@ def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>; defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>; +defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>; // FIXME: The below is closer to correct, but caused some perf regressions. diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 76b7f1079b1..2e92c53b531 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -227,6 +227,7 @@ def : WriteRes<WriteVecLoad, [ZnAGU]> { let Latency = 8; } defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>; +defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WritePHAdd, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>; diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 53be4c53dcd..fee3c83a843 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -1368,7 +1368,7 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pand: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1417,7 +1417,7 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pandn: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [7:0.50] +; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [8:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -5039,7 +5039,7 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_por: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7065,7 +7065,7 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; GENERIC-LABEL: test_pxor: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [8:0.50] ; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 7fbc5177495..6e76b6ed579 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -2829,7 +2829,7 @@ define <8 x float> @ubto8f32(<8 x i32> %a) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] ; GENERIC-NEXT: vpcmpgtd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # sched: [8:0.50] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: ubto8f32: |