diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-05-25 04:47:49 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-05-25 04:47:49 +0000 |
| commit | 4b08fcdeb13c0d6ebb32688e0b7b0915a1e5c9bd (patch) | |
| tree | 8d0f4d58bebb34b8372fef3159b2dac989bcff95 /llvm/lib | |
| parent | af6c9df163831b3a977d5dbaa25f2974baf13518 (diff) | |
| download | bcm5719-llvm-4b08fcdeb13c0d6ebb32688e0b7b0915a1e5c9bd.tar.gz bcm5719-llvm-4b08fcdeb13c0d6ebb32688e0b7b0915a1e5c9bd.zip | |
[X86] Add zero idioms to the haswell, broadwell, and skylake schedule models. Add 256-bit fp xor to sandybridge zero idioms
This copies the Sandy Bridge zero idiom support to later CPUs. Adding the AVX2 and AVX512F/VL instructions as appropriate.
Differential Revision: https://reviews.llvm.org/D62360
llvm-svn: 361690
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedBroadwell.td | 87 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedHaswell.td | 87 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeClient.td | 100 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86SchedSkylakeServer.td | 119 |
5 files changed, 395 insertions, 18 deletions
diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 60e2721c795..7574e4b8f89 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -889,8 +889,7 @@ def BWWriteResGroup47 : SchedWriteRes<[BWPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup47], (instregex "(V?)PCMPGTQ(Y?)rr", - "MUL_(FPrST0|FST0r|FrST0)")>; +def: InstRW<[BWWriteResGroup47], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; def BWWriteResGroup49 : SchedWriteRes<[BWPort23]> { let Latency = 5; @@ -1600,6 +1599,90 @@ def: InstRW<[BWWriteResGroup202], (instrs FSTENVm)>; def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Haswell and Broadwell Pipeline" > "Register allocation and +// renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def BWWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def BWWriteZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteALU]> +]>; +def : InstRW<[BWWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def BWWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogic]> +]>; +def : InstRW<[BWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, + VXORPDrr)>; + +def BWWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogicY]> +]>; +def : InstRW<[BWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + +def BWWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicX]> +]>; +def : InstRW<[BWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>; + +def BWWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicY]> +]>; +def : InstRW<[BWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>; + +def BWWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUX]> +]>; +def : InstRW<[BWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def BWWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUY]> +]>; +def : InstRW<[BWWriteVZeroIdiomALUY], (instrs VPSUBBYrr, + VPSUBDYrr, + VPSUBQYrr, + VPSUBWYrr, + VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def BWWritePCMPGTQ : SchedWriteRes<[BWPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def BWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [BWWriteZeroLatency]>, + SchedVar<NoSchedPred, [BWWritePCMPGTQ]> +]>; +def : InstRW<[BWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // CMOVs that use both Z and C flag require an extra uop. def BWWriteCMOVA_CMOVBErr : SchedWriteRes<[BWPort06,BWPort0156]> { let Latency = 2; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 6ddb542e415..284d1567c5c 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -1448,8 +1448,7 @@ def HWWriteResGroup89 : SchedWriteRes<[HWPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup89], (instregex "(V?)PCMPGTQ(Y?)rr", - "MUL_(FPrST0|FST0r|FrST0)")>; +def: InstRW<[HWWriteResGroup89], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; def HWWriteResGroup91_2 : SchedWriteRes<[HWPort0,HWPort23]> { let Latency = 11; @@ -1853,6 +1852,90 @@ def: InstRW<[HWWriteResGroup192], (instrs VGATHERQPSrm, def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Haswell and Broadwell Pipeline" > "Register allocation and +// renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def HWWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def HWWriteZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteALU]> +]>; +def : InstRW<[HWWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def HWWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogic]> +]>; +def : InstRW<[HWWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, + VXORPDrr)>; + +def HWWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogicY]> +]>; +def : InstRW<[HWWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + +def HWWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicX]> +]>; +def : InstRW<[HWWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>; + +def HWWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicY]> +]>; +def : InstRW<[HWWriteVZeroIdiomLogicY], (instrs VPXORYrr)>; + +def HWWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUX]> +]>; +def : InstRW<[HWWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def HWWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUY]> +]>; +def : InstRW<[HWWriteVZeroIdiomALUY], (instrs VPSUBBYrr, + VPSUBDYrr, + VPSUBQYrr, + VPSUBWYrr, + VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def HWWritePCMPGTQ : SchedWriteRes<[HWPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def HWWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [HWWriteZeroLatency]>, + SchedVar<NoSchedPred, [HWWritePCMPGTQ]> +]>; +def : InstRW<[HWWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // The 0x83 ADC/SBB opcodes have special support for immediate 0 to only require // a single uop. It does not apply to the GR8 encoding. And only applies to the // 8-bit immediate since using larger immediate for 0 would be silly. diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 842d67b5c82..d40bdf728a4 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -698,12 +698,6 @@ def SBWriteResGroup29_2 : SchedWriteRes<[SBPort5,SBPort015]> { } def: InstRW<[SBWriteResGroup29_2], (instrs PAUSE)>; -def SBWriteResGroup30 : SchedWriteRes<[SBPort0]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} - def SBWriteResGroup31 : SchedWriteRes<[SBPort23]> { let Latency = 5; let NumMicroOps = 1; @@ -1134,6 +1128,12 @@ def SBWriteFZeroIdiom : SchedWriteVariant<[ def : InstRW<[SBWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, VXORPDrr)>; +def SBWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogicY]> +]>; +def : InstRW<[SBWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + def SBWriteVZeroIdiomLogicX : SchedWriteVariant<[ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, SchedVar<NoSchedPred, [WriteVecLogicX]> @@ -1152,9 +1152,15 @@ def : InstRW<[SBWriteVZeroIdiomALUX], (instrs PSUBBrr, VPSUBBrr, PCMPGTDrr, VPCMPGTDrr, PCMPGTWrr, VPCMPGTWrr)>; +def SBWritePCMPGTQ : SchedWriteRes<[SBPort0]> { + let Latency = 5; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + def SBWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SBWriteZeroLatency]>, - SchedVar<NoSchedPred, [SBWriteResGroup30]> + SchedVar<NoSchedPred, [SBWritePCMPGTQ]> ]>; def : InstRW<[SBWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr)>; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 1119fd3fc11..8f3e4ae62d5 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -659,8 +659,7 @@ def SKLWriteResGroup9 : SchedWriteRes<[SKLPort015]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup9], (instregex "(V?)PADD(B|D|Q|W)(Y?)rr", - "VPBLENDD(Y?)rri", - "(V?)PSUB(B|D|Q|W)(Y?)rr")>; + "VPBLENDD(Y?)rri")>; def SKLWriteResGroup10 : SchedWriteRes<[SKLPort0156]> { let Latency = 1; @@ -770,8 +769,7 @@ def SKLWriteResGroup30 : SchedWriteRes<[SKLPort5]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup30], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", - "VPBROADCAST(B|W)rr", - "(V?)PCMPGTQ(Y?)rr")>; + "VPBROADCAST(B|W)rr")>; def SKLWriteResGroup32 : SchedWriteRes<[SKLPort0,SKLPort0156]> { let Latency = 3; @@ -1742,6 +1740,100 @@ def: InstRW<[SKLWriteResGroup223], (instrs FSTENVm)>; def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Skylake Pipeline" > "Register allocation and renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def SKLWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def SKLWriteZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteALU]> +]>; +def : InstRW<[SKLWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def SKLWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogic]> +]>; +def : InstRW<[SKLWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, XORPDrr, + VXORPDrr)>; + +def SKLWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogicY]> +]>; +def : InstRW<[SKLWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr)>; + +def SKLWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicX]> +]>; +def : InstRW<[SKLWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr)>; + +def SKLWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicY]> +]>; +def : InstRW<[SKLWriteVZeroIdiomLogicY], (instrs VPXORYrr)>; + +def SKLWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUX]> +]>; +def : InstRW<[SKLWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def SKLWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUY]> +]>; +def : InstRW<[SKLWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def SKLWritePSUB : SchedWriteRes<[SKLPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKLWriteVZeroIdiomPSUB : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [SKLWritePSUB]> +]>; +def : InstRW<[SKLWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, + PSUBDrr, VPSUBDrr, + PSUBQrr, VPSUBQrr, + PSUBWrr, VPSUBWrr, + VPSUBBYrr, + VPSUBDYrr, + VPSUBQYrr, + VPSUBWYrr)>; + +def SKLWritePCMPGTQ : SchedWriteRes<[SKLPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKLWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKLWriteZeroLatency]>, + SchedVar<NoSchedPred, [SKLWritePCMPGTQ]> +]>; +def : InstRW<[SKLWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // CMOVs that use both Z and C flag require an extra uop. def SKLWriteCMOVA_CMOVBErr : SchedWriteRes<[SKLPort06]> { let Latency = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index e3456073de3..58caf1dacfc 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -680,8 +680,7 @@ def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", "VPBLENDMD(Z128|Z256)rr", "VPBLENDMQ(Z128|Z256)rr", "VPBLENDMW(Z128|Z256)rr", - "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr", - "(V?)PSUB(B|D|Q|W)rr", + "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rrk", "VPTERNLOGD(Z|Z128|Z256)rri", "VPTERNLOGQ(Z|Z128|Z256)rri")>; @@ -828,7 +827,6 @@ def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0 "VPCMPD(Z|Z128|Z256)rri", "VPCMPEQ(B|D|Q|W)(Z|Z128|Z256)rr", "VPCMPGT(B|D|Q|W)(Z|Z128|Z256)rr", - "(V?)PCMPGTQ(Y?)rr", "VPCMPQ(Z|Z128|Z256)rri", "VPCMPU(B|D|Q|W)(Z|Z128|Z256)rri", "VPCMPW(Z|Z128|Z256)rri", @@ -2458,6 +2456,121 @@ def: InstRW<[SKXWriteResGroup267], (instrs PAUSE)>; def: InstRW<[WriteZero], (instrs CLC)>; + +// Intruction variants handled by the renamer. These might not need execution +// ports in certain conditions. +// See Agner's Fog "The microarchitecture of Intel, AMD and VIA CPUs", +// section "Skylake Pipeline" > "Register allocation and renaming". +// These can be investigated with llvm-exegesis, e.g. +// echo 'pxor %mm0, %mm0' | /tmp/llvm-exegesis -mode=uops -snippets-file=- +// echo 'vxorpd %xmm0, %xmm0, %xmm1' | /tmp/llvm-exegesis -mode=uops -snippets-file=- + +def SKXWriteZeroLatency : SchedWriteRes<[]> { + let Latency = 0; +} + +def SKXWriteZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteALU]> +]>; +def : InstRW<[SKXWriteZeroIdiom], (instrs SUB32rr, SUB64rr, + XOR32rr, XOR64rr)>; + +def SKXWriteFZeroIdiom : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogic]> +]>; +def : InstRW<[SKXWriteFZeroIdiom], (instrs XORPSrr, VXORPSrr, + XORPDrr, VXORPDrr, + VXORPSZ128rr, + VXORPDZ128rr)>; + +def SKXWriteFZeroIdiomY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogicY]> +]>; +def : InstRW<[SKXWriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr, + VXORPSZ256rr, VXORPDZ256rr)>; + +def SKXWriteFZeroIdiomZ : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteFLogicZ]> +]>; +def : InstRW<[SKXWriteFZeroIdiomZ], (instrs VXORPSZrr, VXORPDZrr)>; + +def SKXWriteVZeroIdiomLogicX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicX]> +]>; +def : InstRW<[SKXWriteVZeroIdiomLogicX], (instrs PXORrr, VPXORrr, + VPXORDZ128rr, VPXORQZ128rr)>; + +def SKXWriteVZeroIdiomLogicY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicY]> +]>; +def : InstRW<[SKXWriteVZeroIdiomLogicY], (instrs VPXORYrr, + VPXORDZ256rr, VPXORQZ256rr)>; + +def SKXWriteVZeroIdiomLogicZ : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecLogicZ]> +]>; +def : InstRW<[SKXWriteVZeroIdiomLogicZ], (instrs VPXORDZrr, VPXORQZrr)>; + +def SKXWriteVZeroIdiomALUX : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUX]> +]>; +def : InstRW<[SKXWriteVZeroIdiomALUX], (instrs PCMPGTBrr, VPCMPGTBrr, + PCMPGTDrr, VPCMPGTDrr, + PCMPGTWrr, VPCMPGTWrr)>; + +def SKXWriteVZeroIdiomALUY : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [WriteVecALUY]> +]>; +def : InstRW<[SKXWriteVZeroIdiomALUY], (instrs VPCMPGTBYrr, + VPCMPGTDYrr, + VPCMPGTWYrr)>; + +def SKXWritePSUB : SchedWriteRes<[SKXPort015]> { + let Latency = 1; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKXWriteVZeroIdiomPSUB : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [SKXWritePSUB]> +]>; + +def : InstRW<[SKXWriteVZeroIdiomPSUB], (instrs PSUBBrr, VPSUBBrr, VPSUBBZ128rr, + PSUBDrr, VPSUBDrr, VPSUBDZ128rr, + PSUBQrr, VPSUBQrr, VPSUBQZ128rr, + PSUBWrr, VPSUBWrr, VPSUBWZ128rr, + VPSUBBYrr, VPSUBBZ256rr, + VPSUBDYrr, VPSUBDZ256rr, + VPSUBQYrr, VPSUBQZ256rr, + VPSUBWYrr, VPSUBWZ256rr, + VPSUBBZrr, + VPSUBDZrr, + VPSUBQZrr, + VPSUBWZrr)>; +def SKXWritePCMPGTQ : SchedWriteRes<[SKXPort5]> { + let Latency = 3; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} + +def SKXWriteVZeroIdiomPCMPGTQ : SchedWriteVariant<[ + SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [SKXWriteZeroLatency]>, + SchedVar<NoSchedPred, [SKXWritePCMPGTQ]> +]>; +def : InstRW<[SKXWriteVZeroIdiomPCMPGTQ], (instrs PCMPGTQrr, VPCMPGTQrr, + VPCMPGTQYrr)>; + + // CMOVs that use both Z and C flag require an extra uop. def SKXWriteCMOVA_CMOVBErr : SchedWriteRes<[SKXPort06]> { let Latency = 2; |

