diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-07 18:25:19 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-05-07 18:25:19 +0000 |
| commit | e480ed0b9f3b86c77efbf84505baf92cd95554ed (patch) | |
| tree | 90e3a11643c674dab0691af76631a63b87ae0da8 /llvm/lib | |
| parent | f64f345e1ba0ec613e78f1b709b95f74f67cc477 (diff) | |
| download | bcm5719-llvm-e480ed0b9f3b86c77efbf84505baf92cd95554ed.tar.gz bcm5719-llvm-e480ed0b9f3b86c77efbf84505baf92cd95554ed.zip | |
[X86][AVX2] Tag VPMOVSX/VPMOVZX ymm instructions as WriteShuffle256
These are more like cross-lane shuffles than regular shuffles - we already do this for AVX512 equivalents.
Differential Revision: https://reviews.llvm.org/D46229
llvm-svn: 331659
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 17 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedBroadwell.td | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 25 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 21 | ||||
| -rwxr-xr-x | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ScheduleZnver1.td | 9 |
6 files changed, 21 insertions, 82 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 4913bd486e4..fb422a74080 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -4881,26 +4881,29 @@ multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>, Sched<[sched.Folded]>; } -// FIXME: YMM cases should use SchedWriteShuffle.YMM. + multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, - X86SchedWriteWidths sched, Predicate prd> { - defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, sched.XMM>; + Predicate prd> { + defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, + SchedWriteShuffle.XMM>; let Predicates = [HasAVX, prd] in defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, - VR128, VR128, sched.XMM>, VEX, VEX_WIG; + VR128, VR128, SchedWriteShuffle.XMM>, + VEX, VEX_WIG; let Predicates = [HasAVX2, prd] in defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, - VR256, VR128, sched.XMM>, VEX, VEX_L, VEX_WIG; + VR256, VR128, WriteShuffle256>, + VEX, VEX_L, VEX_WIG; } multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, X86MemOperand MemYOp, Predicate prd> { defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), - MemOp, MemYOp, SchedWriteShuffle, prd>; + MemOp, MemYOp, prd>; defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), !strconcat("pmovzx", OpcodeStr), - MemOp, MemYOp, SchedWriteShuffle, prd>; + MemOp, MemYOp, prd>; } defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem, NoVLX_Or_NoBWI>; diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 936dd6e81a6..4c2aef0099b 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -687,19 +687,7 @@ def BWWriteResGroup28 : SchedWriteRes<[BWPort5]> { let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup28], (instregex "VPBROADCASTBrr", - "VPBROADCASTWrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "VPBROADCASTWrr")>; def BWWriteResGroup30 : SchedWriteRes<[BWPort0156]> { let Latency = 2; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index b7de192b00e..b60c0d69af5 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1287,19 +1287,7 @@ def HWWriteResGroup51 : SchedWriteRes<[HWPort5]> { let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup51], (instregex "VPBROADCASTBrr", - "VPBROADCASTWrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "VPBROADCASTWrr")>; def HWWriteResGroup52 : SchedWriteRes<[HWPort1,HWPort23]> { let Latency = 9; @@ -1320,17 +1308,6 @@ def: InstRW<[HWWriteResGroup52_1], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "VCVTPS2DQYrm", "VCVTTPS2DQYrm")>; -def HWWriteResGroup53 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 10; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup53], (instregex "VPMOVZXBDYrm", - "VPMOVZXBQYrm", - "VPMOVZXBWYrm", - "VPMOVZXDQYrm", - "VPMOVZXWQYrm")>; - def HWWriteResGroup53_1 : SchedWriteRes<[HWPort5,HWPort23]> { let Latency = 9; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 9875ce32236..408b3ce3690 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -731,19 +731,7 @@ def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_FPrST0", "(ADD|SUB|SUBR)_FrST0", "VPBROADCASTBrr", "VPBROADCASTWrr", - "(V?)PCMPGTQ(Y?)rr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr")>; + "(V?)PCMPGTQ(Y?)rr")>; def SKLWriteResGroup31 : SchedWriteRes<[SKLPort0,SKLPort5]> { let Latency = 3; @@ -1558,12 +1546,7 @@ def SKLWriteResGroup133 : SchedWriteRes<[SKLPort5,SKLPort23]> { } def: InstRW<[SKLWriteResGroup133], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "ILD_F(16|32|64)m", - "VPCMPGTQYrm", - "VPMOVZXBDYrm", - "VPMOVZXBQYrm", - "VPMOVZXBWYrm", - "VPMOVZXDQYrm", - "VPMOVZXWQYrm")>; + "VPCMPGTQYrm")>; def SKLWriteResGroup134 : SchedWriteRes<[SKLPort01,SKLPort23]> { let Latency = 10; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 7bff6d0844a..00a1c2b9eff 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1062,18 +1062,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_FPrST0", "VPMINUQZ128rr", "VPMINUQZ256rr", "VPMINUQZrr", - "VPMOVSXBDYrr", - "VPMOVSXBQYrr", - "VPMOVSXBWYrr", - "VPMOVSXDQYrr", - "VPMOVSXWDYrr", - "VPMOVSXWQYrr", - "VPMOVZXBDYrr", - "VPMOVZXBQYrr", - "VPMOVZXBWYrr", - "VPMOVZXDQYrr", - "VPMOVZXWDYrr", - "VPMOVZXWQYrr", "VPSADBWZrr", // TODO: 512-bit ops require ports 0/1 to be joined. "VPTESTMBZ128rr", "VPTESTMBZ256rr", @@ -2603,11 +2591,6 @@ def: InstRW<[SKXWriteResGroup148], (instregex "(ADD|SUB|SUBR)_F(32|64)m", "VPMINSQZrm(b?)", "VPMINUQZ256rm(b?)", "VPMINUQZrm(b?)", - "VPMOVZXBDYrm", - "VPMOVZXBQYrm", - "VPMOVZXBWYrm", - "VPMOVZXDQYrm", - "VPMOVZXWQYrm", "VPTESTMBZ256rm(b?)", "VPTESTMBZrm(b?)", "VPTESTMDZ256rm(b?)", diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index c2325577962..9f0c637d0e6 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -940,15 +940,20 @@ def ZnWriteFPU12Y : SchedWriteRes<[ZnFPU12]> { let NumMicroOps = 2; } def ZnWriteFPU12m : SchedWriteRes<[ZnAGU, ZnFPU12]> ; +def ZnWriteFPU12Ym : SchedWriteRes<[ZnAGU, ZnFPU12]> { + let Latency = 8; + let NumMicroOps = 2; +} def : InstRW<[ZnWriteFPU12], (instregex "MMX_PACKSSDWirr", "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; def : InstRW<[ZnWriteFPU12m], (instregex "MMX_PACKSSDWirm", "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; -// VPMOVSX/ZX BW BD BQ DW DQ. +// VPMOVSX/ZX BW BD BQ WD WQ DQ. // y <- x. -def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Y], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrr")>; +def : InstRW<[ZnWriteFPU12Ym], (instregex "VPMOV(SX|ZX)(BW|BD|BQ|WD|WQ|DQ)Yrm")>; def ZnWriteFPU013 : SchedWriteRes<[ZnFPU013]> ; def ZnWriteFPU013Y : SchedWriteRes<[ZnFPU013]> { |

