diff options
author | Clement Courbet <courbet@google.com> | 2018-06-11 14:37:53 +0000 |
---|---|---|
committer | Clement Courbet <courbet@google.com> | 2018-06-11 14:37:53 +0000 |
commit | 7db69cc08aef49192ea86d4fc0fe7210ad2b5e50 (patch) | |
tree | 9b04c0688b2f371c6d75f2c549c3b8b4ce6b909f | |
parent | 2c543e775f0b94a8dff01645c16d854b955e2c43 (diff) | |
download | bcm5719-llvm-7db69cc08aef49192ea86d4fc0fe7210ad2b5e50.tar.gz bcm5719-llvm-7db69cc08aef49192ea86d4fc0fe7210ad2b5e50.zip |
[X86] Fix skylake server scheduling info.
Summary:
This fixes most of the scheduling info for SKX vector operations.
I had to split a lot of the YMM/ZMM classes into separate classes for YMM and ZMM.
The before/after llvm-exegesis analysis are in the phabricator diff.
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D47721
llvm-svn: 334407
36 files changed, 2077 insertions, 1644 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 818a322762c..0be0b11b0d1 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -8014,8 +8014,8 @@ multiclass avx512_cvtph2ps_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, let Predicates = [HasAVX512] in defm VCVTPH2PSZ : avx512_cvtph2ps<v16f32_info, v16i16x_info, f256mem, loadv4i64, - WriteCvtPH2PSY>, - avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSY>, + WriteCvtPH2PSZ>, + avx512_cvtph2ps_sae<v16f32_info, v16i16x_info, WriteCvtPH2PSZ>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { @@ -8068,8 +8068,8 @@ multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src, let Predicates = [HasAVX512] in { defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem, - WriteCvtPS2PHY, WriteCvtPS2PHYSt>, - avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PH>, + WriteCvtPS2PHZ, WriteCvtPS2PHZSt>, + avx512_cvtps2ph_sae<v16i16x_info, v16f32_info, WriteCvtPS2PHZ>, EVEX, EVEX_V512, EVEX_CD8<32, CD8VH>; let Predicates = [HasVLX] in { defm VCVTPS2PHZ256 : avx512_cvtps2ph<v8i16x_info, v8f32x_info, f128mem, diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 47c3702c538..b8f1856dc60 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -189,25 +189,31 @@ defm : X86WriteRes<WriteFMoveY, [BWPort5], 1, [1], 1>; defm : BWWriteResPair<WriteFAdd, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub. defm : BWWriteResPair<WriteFAddX, [BWPort1], 3, [1], 1, 5>; // Floating point add/sub (XMM). defm : BWWriteResPair<WriteFAddY, [BWPort1], 3, [1], 1, 6>; // Floating point add/sub (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFAddZ>; defm : BWWriteResPair<WriteFAdd64, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub. defm : BWWriteResPair<WriteFAdd64X, [BWPort1], 3, [1], 1, 5>; // Floating point double add/sub (XMM). defm : BWWriteResPair<WriteFAdd64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double add/sub (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFAdd64Z>; defm : BWWriteResPair<WriteFCmp, [BWPort1], 3, [1], 1, 5>; // Floating point compare. defm : BWWriteResPair<WriteFCmpX, [BWPort1], 3, [1], 1, 5>; // Floating point compare (XMM). defm : BWWriteResPair<WriteFCmpY, [BWPort1], 3, [1], 1, 6>; // Floating point compare (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFCmpZ>; defm : BWWriteResPair<WriteFCmp64, [BWPort1], 3, [1], 1, 5>; // Floating point double compare. defm : BWWriteResPair<WriteFCmp64X, [BWPort1], 3, [1], 1, 5>; // Floating point double compare (XMM). defm : BWWriteResPair<WriteFCmp64Y, [BWPort1], 3, [1], 1, 6>; // Floating point double compare (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFCmp64Z>; defm : BWWriteResPair<WriteFCom, [BWPort1], 3>; // Floating point compare to flags. defm : BWWriteResPair<WriteFMul, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication. defm : BWWriteResPair<WriteFMulX, [BWPort01], 3, [1], 1, 5>; // Floating point multiplication (XMM). defm : BWWriteResPair<WriteFMulY, [BWPort01], 3, [1], 1, 6>; // Floating point multiplication (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFMulZ>; defm : BWWriteResPair<WriteFMul64, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication. defm : BWWriteResPair<WriteFMul64X, [BWPort01], 3, [1], 1, 5>; // Floating point double multiplication (XMM). defm : BWWriteResPair<WriteFMul64Y, [BWPort01], 3, [1], 1, 6>; // Floating point double multiplication (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFMul64Z>; //defm : BWWriteResPair<WriteFDiv, [BWPort0,BWFPDivider], 11, [1,3], 1, 5>; // Floating point division. defm : BWWriteResPair<WriteFDivX, [BWPort0,BWFPDivider], 11, [1,5], 1, 5>; // Floating point division (XMM). @@ -233,34 +239,45 @@ defm : BWWriteResPair<WriteFSqrt80, [BWPort0,BWFPDivider], 23, [1,9]>; // Float defm : BWWriteResPair<WriteFRcp, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate. defm : BWWriteResPair<WriteFRcpX, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal estimate (XMM). defm : BWWriteResPair<WriteFRcpY, [BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal estimate (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFRcpZ>; defm : BWWriteResPair<WriteFRsqrt, [BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate. defm : BWWriteResPair<WriteFRsqrtX,[BWPort0], 5, [1], 1, 5>; // Floating point reciprocal square root estimate (XMM). defm : BWWriteResPair<WriteFRsqrtY,[BWPort0,BWPort015], 11, [2,1], 3, 6>; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; defm : BWWriteResPair<WriteFMA, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add. defm : BWWriteResPair<WriteFMAX, [BWPort01], 5, [1], 1, 5>; // Fused Multiply Add (XMM). defm : BWWriteResPair<WriteFMAY, [BWPort01], 5, [1], 1, 6>; // Fused Multiply Add (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFMAZ>; defm : BWWriteResPair<WriteDPPD, [BWPort0,BWPort1,BWPort5], 9, [1,1,1], 3, 5>; // Floating point double dot product. defm : BWWriteResPair<WriteDPPS, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 5>; // Floating point single dot product. defm : BWWriteResPair<WriteDPPSY, [BWPort0,BWPort1,BWPort5], 14, [2,1,1], 4, 6>; // Floating point single dot product (YMM). +defm : X86WriteResPairUnsupported<WriteDPPSZ>; defm : BWWriteResPair<WriteFSign, [BWPort5], 1>; // Floating point fabs/fchs. defm : X86WriteRes<WriteFRnd, [BWPort23], 6, [1], 1>; // Floating point rounding. defm : X86WriteRes<WriteFRndY, [BWPort23], 6, [1], 1>; // Floating point rounding (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFRndZ>; defm : X86WriteRes<WriteFRndLd, [BWPort1,BWPort23], 11, [2,1], 3>; defm : X86WriteRes<WriteFRndYLd, [BWPort1,BWPort23], 12, [2,1], 3>; defm : BWWriteResPair<WriteFLogic, [BWPort5], 1, [1], 1, 5>; // Floating point and/or/xor logicals. defm : BWWriteResPair<WriteFLogicY, [BWPort5], 1, [1], 1, 6>; // Floating point and/or/xor logicals (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFLogicZ>; defm : BWWriteResPair<WriteFTest, [BWPort0], 1, [1], 1, 5>; // Floating point TEST instructions. defm : BWWriteResPair<WriteFTestY, [BWPort0], 1, [1], 1, 6>; // Floating point TEST instructions (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFTestZ>; defm : BWWriteResPair<WriteFShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector shuffles. defm : BWWriteResPair<WriteFShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector shuffles (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteFShuffleZ>; defm : BWWriteResPair<WriteFVarShuffle, [BWPort5], 1, [1], 1, 5>; // Floating point vector variable shuffles. defm : BWWriteResPair<WriteFVarShuffleY, [BWPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles. +defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; defm : BWWriteResPair<WriteFBlend, [BWPort015], 1, [1], 1, 5>; // Floating point vector blends. defm : BWWriteResPair<WriteFBlendY, [BWPort015], 1, [1], 1, 6>; // Floating point vector blends. +defm : X86WriteResPairUnsupported<WriteFBlendZ>; defm : BWWriteResPair<WriteFVarBlend, [BWPort5], 2, [2], 2, 5>; // Fp vector variable blends. defm : BWWriteResPair<WriteFVarBlendY, [BWPort5], 2, [2], 2, 6>; // Fp vector variable blends. +defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; // FMA Scheduling helper class. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -291,31 +308,42 @@ defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156 defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. defm : BWWriteResPair<WriteVecALUX, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. defm : BWWriteResPair<WriteVecALUY, [BWPort15], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteVecALUZ>; defm : BWWriteResPair<WriteVecLogic, [BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor. defm : BWWriteResPair<WriteVecLogicX,[BWPort015], 1, [1], 1, 5>; // Vector integer and/or/xor. defm : BWWriteResPair<WriteVecLogicY,[BWPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteVecLogicZ>; defm : BWWriteResPair<WriteVecTest, [BWPort0,BWPort5], 2, [1,1], 2, 5>; // Vector integer TEST instructions. defm : BWWriteResPair<WriteVecTestY, [BWPort0,BWPort5], 4, [1,1], 2, 6>; // Vector integer TEST instructions (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteVecTestZ>; defm : BWWriteResPair<WriteVecIMul, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply. defm : BWWriteResPair<WriteVecIMulX, [BWPort0], 5, [1], 1, 5>; // Vector integer multiply. defm : BWWriteResPair<WriteVecIMulY, [BWPort0], 5, [1], 1, 6>; // Vector integer multiply. +defm : X86WriteResPairUnsupported<WriteVecIMulZ>; defm : BWWriteResPair<WritePMULLD, [BWPort0], 10, [2], 2, 5>; // Vector PMULLD. defm : BWWriteResPair<WritePMULLDY, [BWPort0], 10, [2], 2, 6>; // Vector PMULLD (YMM/ZMM). +defm : X86WriteResPairUnsupported<WritePMULLDZ>; defm : BWWriteResPair<WriteShuffle, [BWPort5], 1, [1], 1, 5>; // Vector shuffles. defm : BWWriteResPair<WriteShuffleX, [BWPort5], 1, [1], 1, 5>; // Vector shuffles. defm : BWWriteResPair<WriteShuffleY, [BWPort5], 1, [1], 1, 6>; // Vector shuffles (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteShuffleZ>; defm : BWWriteResPair<WriteVarShuffle, [BWPort5], 1, [1], 1, 5>; // Vector variable shuffles. defm : BWWriteResPair<WriteVarShuffleX,[BWPort5], 1, [1], 1, 5>; // Vector variable shuffles. defm : BWWriteResPair<WriteVarShuffleY,[BWPort5], 1, [1], 1, 6>; // Vector variable shuffles (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; defm : BWWriteResPair<WriteBlend, [BWPort5], 1, [1], 1, 5>; // Vector blends. defm : BWWriteResPair<WriteBlendY, [BWPort5], 1, [1], 1, 6>; // Vector blends (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteBlendZ>; defm : BWWriteResPair<WriteVarBlend, [BWPort5], 2, [2], 2, 5>; // Vector variable blends. defm : BWWriteResPair<WriteVarBlendY, [BWPort5], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteVarBlendZ>; defm : BWWriteResPair<WriteMPSAD, [BWPort0, BWPort5], 7, [1, 2], 3, 5>; // Vector MPSAD. defm : BWWriteResPair<WriteMPSADY, [BWPort0, BWPort5], 7, [1, 2], 3, 6>; // Vector MPSAD. +defm : X86WriteResPairUnsupported<WriteMPSADZ>; defm : BWWriteResPair<WritePSADBW, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW. defm : BWWriteResPair<WritePSADBWX, [BWPort0], 5, [1], 1, 5>; // Vector PSADBW. defm : BWWriteResPair<WritePSADBWY, [BWPort0], 5, [1], 1, 6>; // Vector PSADBW (YMM/ZMM). +defm : X86WriteResPairUnsupported<WritePSADBWZ>; defm : BWWriteResPair<WritePHMINPOS, [BWPort0], 5>; // Vector PHMINPOS. // Vector integer shifts. @@ -323,12 +351,15 @@ defm : BWWriteResPair<WriteVecShift, [BWPort0], 1, [1], 1, 5>; defm : BWWriteResPair<WriteVecShiftX, [BWPort0,BWPort5], 2, [1,1], 2, 5>; defm : X86WriteRes<WriteVecShiftY, [BWPort0,BWPort5], 4, [1,1], 2>; defm : X86WriteRes<WriteVecShiftYLd, [BWPort0,BWPort23], 7, [1,1], 2>; +defm : X86WriteResPairUnsupported<WriteVecShiftZ>; defm : BWWriteResPair<WriteVecShiftImm, [BWPort0], 1, [1], 1, 5>; defm : BWWriteResPair<WriteVecShiftImmX, [BWPort0], 1, [1], 1, 5>; // Vector integer immediate shifts (XMM). defm : BWWriteResPair<WriteVecShiftImmY, [BWPort0], 1, [1], 1, 6>; // Vector integer immediate shifts (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; defm : BWWriteResPair<WriteVarVecShift, [BWPort0, BWPort5], 3, [2,1], 3, 5>; // Variable vector shifts. defm : BWWriteResPair<WriteVarVecShiftY, [BWPort0, BWPort5], 3, [2,1], 3, 6>; // Variable vector shifts (YMM/ZMM). +defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [BWPort5]> { @@ -354,33 +385,43 @@ def : WriteRes<WriteVecExtractSt, [BWPort4,BWPort5,BWPort237]> { defm : BWWriteResPair<WriteCvtSS2I, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPS2I, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPS2IY, [BWPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; defm : BWWriteResPair<WriteCvtSD2I, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPD2I, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPD2IY, [BWPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; defm : BWWriteResPair<WriteCvtI2SS, [BWPort1], 4>; defm : BWWriteResPair<WriteCvtI2PS, [BWPort1], 4>; defm : BWWriteResPair<WriteCvtI2PSY, [BWPort1], 4>; +defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; defm : BWWriteResPair<WriteCvtI2SD, [BWPort1], 4>; defm : BWWriteResPair<WriteCvtI2PD, [BWPort1], 4>; defm : BWWriteResPair<WriteCvtI2PDY, [BWPort1], 4>; +defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; defm : BWWriteResPair<WriteCvtSS2SD, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPS2PD, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPS2PDY, [BWPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; defm : BWWriteResPair<WriteCvtSD2SS, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPD2PS, [BWPort1], 3>; defm : BWWriteResPair<WriteCvtPD2PSY, [BWPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; defm : X86WriteRes<WriteCvtPH2PS, [BWPort0,BWPort5], 2, [1,1], 2>; defm : X86WriteRes<WriteCvtPH2PSY, [BWPort0,BWPort5], 2, [1,1], 2>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZ>; defm : X86WriteRes<WriteCvtPH2PSLd, [BWPort0,BWPort23], 6, [1,1], 2>; defm : X86WriteRes<WriteCvtPH2PSYLd, [BWPort0,BWPort23], 6, [1,1], 2>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>; defm : X86WriteRes<WriteCvtPS2PH, [BWPort1,BWPort5], 4, [1,1], 2>; defm : X86WriteRes<WriteCvtPS2PHY, [BWPort1,BWPort5], 6, [1,1], 2>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; defm : X86WriteRes<WriteCvtPS2PHSt, [BWPort1,BWPort4,BWPort237], 5, [1,1,1], 3>; defm : X86WriteRes<WriteCvtPS2PHYSt, [BWPort1,BWPort4,BWPort237], 7, [1,1,1], 3>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // Strings instructions. diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 0001ffb755f..73273f0e9d1 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -185,25 +185,31 @@ defm : X86WriteRes<WriteEMMS, [HWPort01,HWPort15,HWPort015,HWPort0156], defm : HWWriteResPair<WriteFAdd, [HWPort1], 3, [1], 1, 5>; defm : HWWriteResPair<WriteFAddX, [HWPort1], 3, [1], 1, 6>; defm : HWWriteResPair<WriteFAddY, [HWPort1], 3, [1], 1, 7>; +defm : HWWriteResPair<WriteFAddZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFAdd64, [HWPort1], 3, [1], 1, 5>; defm : HWWriteResPair<WriteFAdd64X, [HWPort1], 3, [1], 1, 6>; defm : HWWriteResPair<WriteFAdd64Y, [HWPort1], 3, [1], 1, 7>; +defm : HWWriteResPair<WriteFAdd64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFCmp, [HWPort1], 3, [1], 1, 5>; defm : HWWriteResPair<WriteFCmpX, [HWPort1], 3, [1], 1, 6>; defm : HWWriteResPair<WriteFCmpY, [HWPort1], 3, [1], 1, 7>; +defm : HWWriteResPair<WriteFCmpZ, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFCmp64, [HWPort1], 3, [1], 1, 5>; defm : HWWriteResPair<WriteFCmp64X, [HWPort1], 3, [1], 1, 6>; defm : HWWriteResPair<WriteFCmp64Y, [HWPort1], 3, [1], 1, 7>; +defm : HWWriteResPair<WriteFCmp64Z, [HWPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFCom, [HWPort1], 3>; defm : HWWriteResPair<WriteFMul, [HWPort01], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFMulX, [HWPort01], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFMulY, [HWPort01], 5, [1], 1, 7>; +defm : HWWriteResPair<WriteFMulZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFMul64, [HWPort01], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFMul64X, [HWPort01], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFMul64Y, [HWPort01], 5, [1], 1, 7>; +defm : HWWriteResPair<WriteFMul64Z, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFDiv, [HWPort0,HWFPDivider], 13, [1,7], 1, 5>; defm : HWWriteResPair<WriteFDivX, [HWPort0,HWFPDivider], 13, [1,7], 1, 6>; @@ -217,10 +223,12 @@ defm : HWWriteResPair<WriteFDiv64Z, [HWPort0,HWPort15,HWFPDivider], 35, [2,1,28] defm : HWWriteResPair<WriteFRcp, [HWPort0], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFRcpX, [HWPort0], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFRcpY, [HWPort0,HWPort015], 11, [2,1], 3, 7>; +defm : HWWriteResPair<WriteFRcpZ, [HWPort0,HWPort015], 11, [2,1], 3, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFRsqrt, [HWPort0], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFRsqrtX,[HWPort0], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFRsqrtY,[HWPort0,HWPort015], 11, [2,1], 3, 7>; +defm : HWWriteResPair<WriteFRsqrtZ,[HWPort0,HWPort015], 11, [2,1], 3, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFSqrt, [HWPort0,HWFPDivider], 11, [1,7], 1, 5>; defm : HWWriteResPair<WriteFSqrtX, [HWPort0,HWFPDivider], 11, [1,7], 1, 6>; @@ -235,60 +243,80 @@ defm : HWWriteResPair<WriteFSqrt80, [HWPort0,HWFPDivider], 23, [1,17]>; defm : HWWriteResPair<WriteFMA, [HWPort01], 5, [1], 1, 5>; defm : HWWriteResPair<WriteFMAX, [HWPort01], 5, [1], 1, 6>; defm : HWWriteResPair<WriteFMAY, [HWPort01], 5, [1], 1, 7>; +defm : HWWriteResPair<WriteFMAZ, [HWPort01], 5, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteDPPD, [HWPort0,HWPort1,HWPort5], 9, [1,1,1], 3, 6>; defm : HWWriteResPair<WriteDPPS, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 6>; defm : HWWriteResPair<WriteDPPSY, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; +defm : HWWriteResPair<WriteDPPSZ, [HWPort0,HWPort1,HWPort5], 14, [2,1,1], 4, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFSign, [HWPort0], 1>; defm : X86WriteRes<WriteFRnd, [HWPort23], 6, [1], 1>; defm : X86WriteRes<WriteFRndY, [HWPort23], 6, [1], 1>; +defm : X86WriteRes<WriteFRndZ, [HWPort23], 6, [1], 1>; // Unsupported = 1 defm : X86WriteRes<WriteFRndLd, [HWPort1,HWPort23], 12, [2,1], 3>; defm : X86WriteRes<WriteFRndYLd, [HWPort1,HWPort23], 13, [2,1], 3>; +defm : X86WriteRes<WriteFRndZLd, [HWPort1,HWPort23], 13, [2,1], 3>; // Unsupported = 1 defm : HWWriteResPair<WriteFLogic, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteFLogicY, [HWPort5], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteFLogicZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFTest, [HWPort0], 1, [1], 1, 6>; defm : HWWriteResPair<WriteFTestY, [HWPort0], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteFTestZ, [HWPort0], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFShuffle, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteFShuffleY, [HWPort5], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteFShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFVarShuffle, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteFVarShuffleY, [HWPort5], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteFVarShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFBlend, [HWPort015], 1, [1], 1, 6>; defm : HWWriteResPair<WriteFBlendY, [HWPort015], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteFBlendZ, [HWPort015], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteFShuffle256, [HWPort5], 3, [1], 1, 7>; defm : HWWriteResPair<WriteFVarShuffle256, [HWPort5], 3, [1], 1, 7>; defm : HWWriteResPair<WriteFVarBlend, [HWPort5], 2, [2], 2, 6>; defm : HWWriteResPair<WriteFVarBlendY, [HWPort5], 2, [2], 2, 7>; +defm : HWWriteResPair<WriteFVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported = 1 // Conversion between integer and float. defm : HWWriteResPair<WriteCvtSD2I, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPD2I, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPD2IY, [HWPort1], 3>; +defm : HWWriteResPair<WriteCvtPD2IZ, [HWPort1], 3>; // Unsupported = 1 defm : HWWriteResPair<WriteCvtSS2I, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPS2I, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPS2IY, [HWPort1], 3>; +defm : HWWriteResPair<WriteCvtPS2IZ, [HWPort1], 3>; // Unsupported = 1 defm : HWWriteResPair<WriteCvtI2SD, [HWPort1], 4>; defm : HWWriteResPair<WriteCvtI2PD, [HWPort1], 4>; defm : HWWriteResPair<WriteCvtI2PDY, [HWPort1], 4>; +defm : HWWriteResPair<WriteCvtI2PDZ, [HWPort1], 4>; // Unsupported = 1 defm : HWWriteResPair<WriteCvtI2SS, [HWPort1], 4>; defm : HWWriteResPair<WriteCvtI2PS, [HWPort1], 4>; defm : HWWriteResPair<WriteCvtI2PSY, [HWPort1], 4>; +defm : HWWriteResPair<WriteCvtI2PSZ, [HWPort1], 4>; // Unsupported = 1 defm : HWWriteResPair<WriteCvtSS2SD, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPS2PD, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPS2PDY, [HWPort1], 3>; +defm : HWWriteResPair<WriteCvtPS2PDZ, [HWPort1], 3>; // Unsupported = 1 defm : HWWriteResPair<WriteCvtSD2SS, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPD2PS, [HWPort1], 3>; defm : HWWriteResPair<WriteCvtPD2PSY, [HWPort1], 3>; +defm : HWWriteResPair<WriteCvtPD2PSZ, [HWPort1], 3>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPH2PS, [HWPort0,HWPort5], 2, [1,1], 2>; defm : X86WriteRes<WriteCvtPH2PSY, [HWPort0,HWPort5], 2, [1,1], 2>; +defm : X86WriteRes<WriteCvtPH2PSZ, [HWPort0,HWPort5], 2, [1,1], 2>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPH2PSLd, [HWPort0,HWPort23], 6, [1,1], 2>; defm : X86WriteRes<WriteCvtPH2PSYLd, [HWPort0,HWPort23], 7, [1,1], 2>; +defm : X86WriteRes<WriteCvtPH2PSZLd, [HWPort0,HWPort23], 7, [1,1], 2>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPS2PH, [HWPort1,HWPort5], 4, [1,1], 2>; defm : X86WriteRes<WriteCvtPS2PHY, [HWPort1,HWPort5], 6, [1,1], 2>; +defm : X86WriteRes<WriteCvtPS2PHZ, [HWPort1,HWPort5], 6, [1,1], 2>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPS2PHSt, [HWPort1,HWPort4,HWPort5,HWPort237], 5, [1,1,1,1], 4>; defm : X86WriteRes<WriteCvtPS2PHYSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>; +defm : X86WriteRes<WriteCvtPS2PHZSt, [HWPort1,HWPort4,HWPort5,HWPort237], 7, [1,1,1,1], 4>; // Unsupported = 1 // Vector integer operations. defm : X86WriteRes<WriteVecLoad, [HWPort23], 5, [1], 1>; @@ -314,46 +342,61 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [HWPort5], 1, [1], 1>; defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 5>; defm : HWWriteResPair<WriteVecLogicX,[HWPort015], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVecLogicY,[HWPort015], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteVecLogicZ,[HWPort015], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteVecTest, [HWPort0,HWPort5], 2, [1,1], 2, 6>; defm : HWWriteResPair<WriteVecTestY, [HWPort0,HWPort5], 4, [1,1], 2, 7>; +defm : HWWriteResPair<WriteVecTestZ, [HWPort0,HWPort5], 4, [1,1], 2, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteVecALU, [HWPort15], 1, [1], 1, 5>; defm : HWWriteResPair<WriteVecALUX, [HWPort15], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVecALUY, [HWPort15], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteVecALUZ, [HWPort15], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteVecIMul, [HWPort0], 5, [1], 1, 5>; defm : HWWriteResPair<WriteVecIMulX, [HWPort0], 5, [1], 1, 6>; defm : HWWriteResPair<WriteVecIMulY, [HWPort0], 5, [1], 1, 7>; +defm : HWWriteResPair<WriteVecIMulZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WritePMULLD, [HWPort0], 10, [2], 2, 6>; defm : HWWriteResPair<WritePMULLDY, [HWPort0], 10, [2], 2, 7>; +defm : HWWriteResPair<WritePMULLDZ, [HWPort0], 10, [2], 2, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteShuffle, [HWPort5], 1, [1], 1, 5>; defm : HWWriteResPair<WriteShuffleX, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteShuffleY, [HWPort5], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteShuffleZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteVarShuffle, [HWPort5], 1, [1], 1, 5>; defm : HWWriteResPair<WriteVarShuffleX,[HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVarShuffleY,[HWPort5], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteVarShuffleZ,[HWPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteBlend, [HWPort5], 1, [1], 1, 6>; defm : HWWriteResPair<WriteBlendY, [HWPort5], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteBlendZ, [HWPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteShuffle256, [HWPort5], 3, [1], 1, 7>; defm : HWWriteResPair<WriteVarShuffle256, [HWPort5], 3, [1], 1, 7>; defm : HWWriteResPair<WriteVarBlend, [HWPort5], 2, [2], 2, 6>; defm : HWWriteResPair<WriteVarBlendY, [HWPort5], 2, [2], 2, 7>; +defm : HWWriteResPair<WriteVarBlendZ, [HWPort5], 2, [2], 2, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteMPSAD, [HWPort0, HWPort5], 7, [1, 2], 3, 6>; defm : HWWriteResPair<WriteMPSADY, [HWPort0, HWPort5], 7, [1, 2], 3, 7>; +defm : HWWriteResPair<WriteMPSADZ, [HWPort0, HWPort5], 7, [1, 2], 3, 7>; // Unsupported = 1 defm : HWWriteResPair<WritePSADBW, [HWPort0], 5, [1], 1, 5>; defm : HWWriteResPair<WritePSADBWX, [HWPort0], 5, [1], 1, 6>; defm : HWWriteResPair<WritePSADBWY, [HWPort0], 5, [1], 1, 7>; +defm : HWWriteResPair<WritePSADBWZ, [HWPort0], 5, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WritePHMINPOS, [HWPort0], 5, [1], 1, 6>; // Vector integer shifts. defm : HWWriteResPair<WriteVecShift, [HWPort0], 1, [1], 1, 5>; defm : HWWriteResPair<WriteVecShiftX, [HWPort0,HWPort5], 2, [1,1], 2, 6>; defm : X86WriteRes<WriteVecShiftY, [HWPort0,HWPort5], 4, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftZ, [HWPort0,HWPort5], 4, [1,1], 2>; // Unsupported = 1 defm : X86WriteRes<WriteVecShiftYLd, [HWPort0,HWPort23], 8, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftZLd, [HWPort0,HWPort23], 8, [1,1], 2>; // Unsupported = 1 defm : HWWriteResPair<WriteVecShiftImm, [HWPort0], 1, [1], 1, 5>; defm : HWWriteResPair<WriteVecShiftImmX, [HWPort0], 1, [1], 1, 6>; defm : HWWriteResPair<WriteVecShiftImmY, [HWPort0], 1, [1], 1, 7>; +defm : HWWriteResPair<WriteVecShiftImmZ, [HWPort0], 1, [1], 1, 7>; // Unsupported = 1 defm : HWWriteResPair<WriteVarVecShift, [HWPort0, HWPort5], 3, [2,1], 3, 6>; defm : HWWriteResPair<WriteVarVecShiftY, [HWPort0, HWPort5], 3, [2,1], 3, 7>; +defm : HWWriteResPair<WriteVarVecShiftZ, [HWPort0, HWPort5], 3, [2,1], 3, 7>; // Unsupported = 1 // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [HWPort5]> { diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index a7795021b6a..396706e5f46 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -176,25 +176,31 @@ defm : X86WriteRes<WriteEMMS, [SBPort015], 31, [31], 31>; defm : SBWriteResPair<WriteFAdd, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFAddX, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFAddY, [SBPort1], 3, [1], 1, 7>; +defm : SBWriteResPair<WriteFAddZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFAdd64, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFAdd64X, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFAdd64Y, [SBPort1], 3, [1], 1, 7>; +defm : SBWriteResPair<WriteFAdd64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFCmp, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFCmpX, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFCmpY, [SBPort1], 3, [1], 1, 7>; +defm : SBWriteResPair<WriteFCmpZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFCmp64, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFCmp64X, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFCmp64Y, [SBPort1], 3, [1], 1, 7>; +defm : SBWriteResPair<WriteFCmp64Z, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFCom, [SBPort1], 3>; defm : SBWriteResPair<WriteFMul, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFMulX, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFMulY, [SBPort0], 5, [1], 1, 7>; +defm : SBWriteResPair<WriteFMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFMul64, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFMul64X, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFMul64Y, [SBPort0], 5, [1], 1, 7>; +defm : SBWriteResPair<WriteFMul64Z, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFDiv, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; defm : SBWriteResPair<WriteFDivX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; @@ -208,10 +214,12 @@ defm : SBWriteResPair<WriteFDiv64Z, [SBPort0,SBPort05,SBFPDivider], 45, [2,1,44] defm : SBWriteResPair<WriteFRcp, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFRcpX, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFRcpY, [SBPort0,SBPort05], 7, [2,1], 3, 7>; +defm : SBWriteResPair<WriteFRcpZ, [SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFRsqrt, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFRsqrtX,[SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteFRsqrtY,[SBPort0,SBPort05], 7, [2,1], 3, 7>; +defm : SBWriteResPair<WriteFRsqrtZ,[SBPort0,SBPort05], 7, [2,1], 3, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFSqrt, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; defm : SBWriteResPair<WriteFSqrtX, [SBPort0,SBFPDivider], 14, [1,14], 1, 6>; @@ -226,58 +234,78 @@ defm : SBWriteResPair<WriteFSqrt80, [SBPort0,SBFPDivider], 24, [1,24], 1, 6>; defm : SBWriteResPair<WriteDPPD, [SBPort0,SBPort1,SBPort5], 9, [1,1,1], 3, 6>; defm : SBWriteResPair<WriteDPPS, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 6>; defm : SBWriteResPair<WriteDPPSY, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>; +defm : SBWriteResPair<WriteDPPSZ, [SBPort0,SBPort1,SBPort5], 12, [1,2,1], 4, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFSign, [SBPort5], 1>; defm : SBWriteResPair<WriteFRnd, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteFRndY, [SBPort1], 3, [1], 1, 7>; +defm : SBWriteResPair<WriteFRndZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFLogic, [SBPort5], 1, [1], 1, 6>; defm : SBWriteResPair<WriteFLogicY, [SBPort5], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteFLogicZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFTest, [SBPort0], 1, [1], 1, 6>; defm : SBWriteResPair<WriteFTestY, [SBPort0], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteFTestZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFShuffle, [SBPort5], 1, [1], 1, 6>; defm : SBWriteResPair<WriteFShuffleY,[SBPort5], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteFShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFVarShuffle, [SBPort5], 1, [1], 1, 6>; defm : SBWriteResPair<WriteFVarShuffleY,[SBPort5], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteFVarShuffleZ,[SBPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFBlend, [SBPort05], 1, [1], 1, 6>; defm : SBWriteResPair<WriteFBlendY, [SBPort05], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteFBlendZ, [SBPort05], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteFVarBlend, [SBPort05], 2, [2], 2, 6>; defm : SBWriteResPair<WriteFVarBlendY,[SBPort05], 2, [2], 2, 7>; +defm : SBWriteResPair<WriteFVarBlendZ,[SBPort05], 2, [2], 2, 7>; // Unsupported = 1 // Conversion between integer and float. defm : SBWriteResPair<WriteCvtSS2I, [SBPort0,SBPort1], 5, [1,1], 2>; defm : SBWriteResPair<WriteCvtPS2I, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteCvtPS2IY, [SBPort1], 3, [1], 1, 7>; +defm : SBWriteResPair<WriteCvtPS2IZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteCvtSD2I, [SBPort0,SBPort1], 5, [1,1], 2>; defm : SBWriteResPair<WriteCvtPD2I, [SBPort1,SBPort5], 4, [1,1], 2, 6>; defm : X86WriteRes<WriteCvtPD2IY, [SBPort1,SBPort5], 4, [1,1], 2>; +defm : X86WriteRes<WriteCvtPD2IZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPD2IYLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; +defm : X86WriteRes<WriteCvtPD2IZLd, [SBPort1,SBPort5,SBPort23], 11, [1,1,1], 3>; // Unsupported = 1 defm : X86WriteRes<WriteCvtI2SS, [SBPort1,SBPort5], 5, [1,2], 3>; defm : X86WriteRes<WriteCvtI2SSLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; defm : SBWriteResPair<WriteCvtI2PS, [SBPort1], 3, [1], 1, 6>; defm : SBWriteResPair<WriteCvtI2PSY, [SBPort1], 3, [1], 1, 7>; +defm : SBWriteResPair<WriteCvtI2PSZ, [SBPort1], 3, [1], 1, 7>; // Unsupported = 1 defm : X86WriteRes<WriteCvtI2SD, [SBPort1,SBPort5], 4, [1,1], 2>; defm : X86WriteRes<WriteCvtI2PD, [SBPort1,SBPort5], 4, [1,1], 2>; defm : X86WriteRes<WriteCvtI2PDY, [SBPort1,SBPort5], 4, [1,1], 2>; +defm : X86WriteRes<WriteCvtI2PDZ, [SBPort1,SBPort5], 4, [1,1], 2>; // Unsupported = 1 defm : X86WriteRes<WriteCvtI2SDLd, [SBPort1,SBPort23], 9, [1,1], 2>; defm : X86WriteRes<WriteCvtI2PDLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; defm : X86WriteRes<WriteCvtI2PDYLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; +defm : X86WriteRes<WriteCvtI2PDZLd, [SBPort1,SBPort5,SBPort23], 10, [1,1,1], 3>; // Unsupported = 1 defm : SBWriteResPair<WriteCvtSS2SD, [SBPort0], 1, [1], 1, 6>; defm : X86WriteRes<WriteCvtPS2PD, [SBPort0,SBPort5], 2, [1,1], 2>; defm : X86WriteRes<WriteCvtPS2PDY, [SBPort0,SBPort5], 2, [1,1], 2>; +defm : X86WriteRes<WriteCvtPS2PDZ, [SBPort0,SBPort5], 2, [1,1], 2>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPS2PDLd, [SBPort0,SBPort23], 7, [1,1], 2>; defm : X86WriteRes<WriteCvtPS2PDYLd, [SBPort0,SBPort23], 7, [1,1], 2>; +defm : X86WriteRes<WriteCvtPS2PDZLd, [SBPort0,SBPort23], 7, [1,1], 2>; // Unsupported = 1 defm : SBWriteResPair<WriteCvtSD2SS, [SBPort1,SBPort5], 4, [1,1], 2, 6>; defm : SBWriteResPair<WriteCvtPD2PS, [SBPort1,SBPort5], 4, [1,1], 2, 6>; defm : SBWriteResPair<WriteCvtPD2PSY, [SBPort1,SBPort5], 4, [1,1], 2, 7>; +defm : SBWriteResPair<WriteCvtPD2PSZ, [SBPort1,SBPort5], 4, [1,1], 2, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteCvtPH2PS, [SBPort1], 3>; defm : SBWriteResPair<WriteCvtPH2PSY, [SBPort1], 3>; +defm : SBWriteResPair<WriteCvtPH2PSZ, [SBPort1], 3>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPS2PH, [SBPort1], 3, [1], 1>; defm : X86WriteRes<WriteCvtPS2PHY, [SBPort1], 3, [1], 1>; +defm : X86WriteRes<WriteCvtPS2PHZ, [SBPort1], 3, [1], 1>; // Unsupported = 1 defm : X86WriteRes<WriteCvtPS2PHSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; defm : X86WriteRes<WriteCvtPS2PHYSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; +defm : X86WriteRes<WriteCvtPS2PHZSt, [SBPort1, SBPort23, SBPort4], 4, [1,1,1], 1>; // Unsupported = 1 // Vector integer operations. defm : X86WriteRes<WriteVecLoad, [SBPort23], 5, [1], 1>; @@ -303,42 +331,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>; defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>; defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVecLogicY,[SBPort015], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteVecLogicZ,[SBPort015], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteVecTest, [SBPort0,SBPort5], 2, [1,1], 2, 6>; defm : SBWriteResPair<WriteVecTestY, [SBPort0,SBPort5], 2, [1,1], 2, 7>; +defm : SBWriteResPair<WriteVecTestZ, [SBPort0,SBPort5], 2, [1,1], 2, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteVecALU, [SBPort1], 3, [1], 1, 5>; defm : SBWriteResPair<WriteVecALUX, [SBPort15], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVecALUY, [SBPort15], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteVecALUZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteVecIMul, [SBPort0], 5, [1], 1, 5>; defm : SBWriteResPair<WriteVecIMulX, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WriteVecIMulY, [SBPort0], 5, [1], 1, 7>; +defm : SBWriteResPair<WriteVecIMulZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WritePMULLD, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WritePMULLDY, [SBPort0], 5, [1], 1, 7>; // TODO this is probably wrong for 256/512-bit for the "generic" model +defm : SBWriteResPair<WritePMULLDZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteShuffle, [SBPort5], 1, [1], 1, 5>; defm : SBWriteResPair<WriteShuffleX, [SBPort15], 1, [1], 1, 6>; defm : SBWriteResPair<WriteShuffleY, [SBPort5], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteShuffleZ, [SBPort5], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteVarShuffle, [SBPort15], 1, [1], 1, 5>; defm : SBWriteResPair<WriteVarShuffleX, [SBPort15], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVarShuffleY, [SBPort15], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteVarShuffleZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteBlend, [SBPort15], 1, [1], 1, 6>; defm : SBWriteResPair<WriteBlendY, [SBPort15], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteBlendZ, [SBPort15], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteVarBlend, [SBPort15], 2, [2], 2, 6>; defm : SBWriteResPair<WriteVarBlendY,[SBPort15], 2, [2], 2, 7>; +defm : SBWriteResPair<WriteVarBlendZ,[SBPort15], 2, [2], 2, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteMPSAD, [SBPort0, SBPort15], 7, [1,2], 3, 6>; defm : SBWriteResPair<WriteMPSADY, [SBPort0, SBPort15], 7, [1,2], 3, 7>; +defm : SBWriteResPair<WriteMPSADZ, [SBPort0, SBPort15], 7, [1,2], 3, 7>; // Unsupported = 1 defm : SBWriteResPair<WritePSADBW, [SBPort0], 5, [1], 1, 5>; defm : SBWriteResPair<WritePSADBWX, [SBPort0], 5, [1], 1, 6>; defm : SBWriteResPair<WritePSADBWY, [SBPort0], 5, [1], 1, 7>; +defm : SBWriteResPair<WritePSADBWZ, [SBPort0], 5, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WritePHMINPOS, [SBPort0], 5, [1], 1, 6>; // Vector integer shifts. defm : SBWriteResPair<WriteVecShift, [SBPort5], 1, [1], 1, 5>; defm : SBWriteResPair<WriteVecShiftX, [SBPort0,SBPort15], 2, [1,1], 2, 6>; defm : SBWriteResPair<WriteVecShiftY, [SBPort0,SBPort15], 4, [1,1], 2, 7>; +defm : SBWriteResPair<WriteVecShiftZ, [SBPort0,SBPort15], 4, [1,1], 2, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteVecShiftImm, [SBPort5], 1, [1], 1, 5>; defm : SBWriteResPair<WriteVecShiftImmX, [SBPort0], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVecShiftImmY, [SBPort0], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteVecShiftImmZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1 defm : SBWriteResPair<WriteVarVecShift, [SBPort0], 1, [1], 1, 6>; defm : SBWriteResPair<WriteVarVecShiftY, [SBPort0], 1, [1], 1, 7>; +defm : SBWriteResPair<WriteVarVecShiftZ, [SBPort0], 1, [1], 1, 7>; // Unsupported = 1 // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [SBPort5,SBPort15]> { @@ -365,9 +407,11 @@ def : WriteRes<WriteVecExtractSt, [SBPort4,SBPort23,SBPort15]> { defm : SBWriteResPair<WriteFHAdd, [SBPort1,SBPort5], 5, [1,2], 3, 6>; defm : SBWriteResPair<WriteFHAddY, [SBPort1,SBPort5], 5, [1,2], 3, 7>; +defm : SBWriteResPair<WriteFHAddZ, [SBPort1,SBPort5], 5, [1,2], 3, 7>; // Unsupported = 1 defm : SBWriteResPair<WritePHAdd, [SBPort15], 3, [3], 3, 5>; defm : SBWriteResPair<WritePHAddX, [SBPort15], 3, [3], 3, 6>; defm : SBWriteResPair<WritePHAddY, [SBPort15], 3, [3], 3, 7>; +defm : SBWriteResPair<WritePHAddZ, [SBPort15], 3, [3], 3, 7>; // Unsupported = 1 //////////////////////////////////////////////////////////////////////////////// // String instructions. @@ -484,6 +528,7 @@ defm : SBWriteResPair<WriteVarShuffle256, [SBPort5], 1, [1], 1, 7>; defm : SBWriteResPair<WriteFMA, [SBPort01], 5>; defm : SBWriteResPair<WriteFMAX, [SBPort01], 5>; defm : SBWriteResPair<WriteFMAY, [SBPort01], 5>; +defm : SBWriteResPair<WriteFMAZ, [SBPort01], 5>; // Unsupported = 1 // Remaining SNB instrs. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 761b9b8c975..952db050799 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -183,76 +183,93 @@ defm : X86WriteRes<WriteFMoveY, [SKLPort015], 1, [1], 1>; defm : X86WriteRes<WriteEMMS, [SKLPort05,SKLPort0156], 10, [9,1], 10>; defm : SKLWriteResPair<WriteFAdd, [SKLPort01], 4, [1], 1, 5>; // Floating point add/sub. -defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; // Floating point add/sub (XMM). -defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM). +defm : SKLWriteResPair<WriteFAddX, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFAddY, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFAddZ>; defm : SKLWriteResPair<WriteFAdd64, [SKLPort01], 4, [1], 1, 5>; // Floating point double add/sub. -defm : SKLWriteResPair<WriteFAdd64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double add/sub (XMM). -defm : SKLWriteResPair<WriteFAdd64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM). +defm : SKLWriteResPair<WriteFAdd64X, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFAdd64Y, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFAdd64Z>; defm : SKLWriteResPair<WriteFCmp, [SKLPort01], 4, [1], 1, 5>; // Floating point compare. -defm : SKLWriteResPair<WriteFCmpX, [SKLPort01], 4, [1], 1, 6>; // Floating point compare (XMM). -defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM). +defm : SKLWriteResPair<WriteFCmpX, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFCmpY, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFCmpZ>; defm : SKLWriteResPair<WriteFCmp64, [SKLPort01], 4, [1], 1, 5>; // Floating point double compare. -defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double compare (XMM). -defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM). +defm : SKLWriteResPair<WriteFCmp64X, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFCmp64Y, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFCmp64Z>; defm : SKLWriteResPair<WriteFCom, [SKLPort0], 2>; // Floating point compare to flags. defm : SKLWriteResPair<WriteFMul, [SKLPort01], 4, [1], 1, 5>; // Floating point multiplication. -defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>; // Floating point multiplication (XMM). -defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM). +defm : SKLWriteResPair<WriteFMulX, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFMulY, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFMulZ>; defm : SKLWriteResPair<WriteFMul64, [SKLPort01], 4, [1], 1, 5>; // Floating point double multiplication. -defm : SKLWriteResPair<WriteFMul64X, [SKLPort01], 4, [1], 1, 6>; // Floating point double multiplication (XMM). -defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM). +defm : SKLWriteResPair<WriteFMul64X, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFMul64Y, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFMul64Z>; defm : SKLWriteResPair<WriteFDiv, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 5>; // Floating point division. -//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; // Floating point division (XMM). -defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; // Floating point division (YMM). +//defm : SKLWriteResPair<WriteFDivX, [SKLPort0,SKLFPDivider], 11, [1,3], 1, 6>; +defm : SKLWriteResPair<WriteFDivY, [SKLPort0,SKLFPDivider], 11, [1,5], 1, 7>; defm : X86WriteResPairUnsupported<WriteFDivZ>; //defm : SKLWriteResPair<WriteFDiv64, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 5>; // Floating point double division. -//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>; // Floating point double division (XMM). -//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; // Floating point double division (YMM). +//defm : SKLWriteResPair<WriteFDiv64X, [SKLPort0,SKLFPDivider], 14, [1,3], 1, 6>; +//defm : SKLWriteResPair<WriteFDiv64Y, [SKLPort0,SKLFPDivider], 14, [1,5], 1, 7>; defm : X86WriteResPairUnsupported<WriteFDiv64Z>; defm : SKLWriteResPair<WriteFSqrt, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 5>; // Floating point square root. -defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM). -defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM). +defm : SKLWriteResPair<WriteFSqrtX, [SKLPort0,SKLFPDivider], 12, [1,3], 1, 6>; +defm : SKLWriteResPair<WriteFSqrtY, [SKLPort0,SKLFPDivider], 12, [1,6], 1, 7>; defm : X86WriteResPairUnsupported<WriteFSqrtZ>; defm : SKLWriteResPair<WriteFSqrt64, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. -defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM). -defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM). +defm : SKLWriteResPair<WriteFSqrt64X, [SKLPort0,SKLFPDivider], 18, [1,6], 1, 6>; +defm : SKLWriteResPair<WriteFSqrt64Y, [SKLPort0,SKLFPDivider], 18, [1,12],1, 7>; defm : X86WriteResPairUnsupported<WriteFSqrt64Z>; defm : SKLWriteResPair<WriteFSqrt80, [SKLPort0,SKLFPDivider], 21, [1,7]>; // Floating point long double square root. defm : SKLWriteResPair<WriteFRcp, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. -defm : SKLWriteResPair<WriteFRcpX, [SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM). -defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM). +defm : SKLWriteResPair<WriteFRcpX, [SKLPort0], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFRcpY, [SKLPort0], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFRcpZ>; defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. -defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM). -defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM). +defm : SKLWriteResPair<WriteFRsqrtX,[SKLPort0], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFRsqrtY,[SKLPort0], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; defm : SKLWriteResPair<WriteFMA, [SKLPort01], 4, [1], 1, 5>; // Fused Multiply Add. -defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>; // Fused Multiply Add (XMM). -defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM). +defm : SKLWriteResPair<WriteFMAX, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteFMAY, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFMAZ>; defm : SKLWriteResPair<WriteDPPD, [SKLPort5,SKLPort01], 9, [1,2], 3, 6>; // Floating point double dot product. -defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; // Floating point single dot product. -defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; // Floating point single dot product (YMM). +defm : SKLWriteResPair<WriteDPPS, [SKLPort5,SKLPort01], 13, [1,3], 4, 6>; +defm : SKLWriteResPair<WriteDPPSY, [SKLPort5,SKLPort01], 13, [1,3], 4, 7>; +defm : X86WriteResPairUnsupported<WriteDPPSZ>; defm : SKLWriteResPair<WriteFSign, [SKLPort0], 1>; // Floating point fabs/fchs. defm : SKLWriteResPair<WriteFRnd, [SKLPort01], 8, [2], 2, 6>; // Floating point rounding. -defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM). +defm : SKLWriteResPair<WriteFRndY, [SKLPort01], 8, [2], 2, 7>; +defm : X86WriteResPairUnsupported<WriteFRndZ>; defm : SKLWriteResPair<WriteFLogic, [SKLPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. -defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). +defm : SKLWriteResPair<WriteFLogicY, [SKLPort015], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFLogicZ>; defm : SKLWriteResPair<WriteFTest, [SKLPort0], 2, [1], 1, 6>; // Floating point TEST instructions. -defm : SKLWriteResPair<WriteFTestY, [SKLPort0], 2, [1], 1, 7>; // Floating point TEST instructions (YMM/ZMM). +defm : SKLWriteResPair<WriteFTestY, [SKLPort0], 2, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFTestZ>; defm : SKLWriteResPair<WriteFShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles. -defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM). +defm : SKLWriteResPair<WriteFShuffleY, [SKLPort5], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFShuffleZ>; defm : SKLWriteResPair<WriteFVarShuffle, [SKLPort5], 1, [1], 1, 6>; // Floating point vector shuffles. -defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Floating point vector shuffles. +defm : SKLWriteResPair<WriteFVarShuffleY, [SKLPort5], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; defm : SKLWriteResPair<WriteFBlend, [SKLPort015], 1, [1], 1, 6>; // Floating point vector blends. -defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; // Floating point vector blends. +defm : SKLWriteResPair<WriteFBlendY, [SKLPort015], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteFBlendZ>; defm : SKLWriteResPair<WriteFVarBlend, [SKLPort015], 2, [2], 2, 6>; // Fp vector variable blends. -defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; // Fp vector variable blends. +defm : SKLWriteResPair<WriteFVarBlendY,[SKLPort015], 2, [2], 2, 7>; +defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; // FMA Scheduling helper class. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -279,33 +296,44 @@ defm : X86WriteRes<WriteVecMoveToGpr, [SKLPort0], 2, [1], 1>; defm : X86WriteRes<WriteVecMoveFromGpr, [SKLPort5], 1, [1], 1>; defm : SKLWriteResPair<WriteVecALU, [SKLPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. -defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM). -defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM). +defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>; +defm : SKLWriteResPair<WriteVecALUY, [SKLPort01], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteVecALUZ>; defm : SKLWriteResPair<WriteVecLogic, [SKLPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. -defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM). -defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). +defm : SKLWriteResPair<WriteVecLogicX,[SKLPort015], 1, [1], 1, 6>; +defm : SKLWriteResPair<WriteVecLogicY,[SKLPort015], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteVecLogicZ>; defm : SKLWriteResPair<WriteVecTest, [SKLPort0,SKLPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. -defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM). +defm : SKLWriteResPair<WriteVecTestY, [SKLPort0,SKLPort5], 3, [1,1], 2, 7>; +defm : X86WriteResPairUnsupported<WriteVecTestZ>; defm : SKLWriteResPair<WriteVecIMul, [SKLPort0] , 4, [1], 1, 5>; // Vector integer multiply. -defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>; // Vector integer multiply (XMM). -defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM). +defm : SKLWriteResPair<WriteVecIMulX, [SKLPort01], 4, [1], 1, 6>; +defm : SKLWriteResPair<WriteVecIMulY, [SKLPort01], 4, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteVecIMulZ>; defm : SKLWriteResPair<WritePMULLD, [SKLPort01], 10, [2], 2, 6>; // Vector PMULLD. -defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM). +defm : SKLWriteResPair<WritePMULLDY, [SKLPort01], 10, [2], 2, 7>; +defm : X86WriteResPairUnsupported<WritePMULLDZ>; defm : SKLWriteResPair<WriteShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles. -defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM). -defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM). +defm : SKLWriteResPair<WriteShuffleX, [SKLPort5], 1, [1], 1, 6>; +defm : SKLWriteResPair<WriteShuffleY, [SKLPort5], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteShuffleZ>; defm : SKLWriteResPair<WriteVarShuffle, [SKLPort5], 1, [1], 1, 5>; // Vector shuffles. -defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>; // Vector shuffles (XMM). -defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM). +defm : SKLWriteResPair<WriteVarShuffleX, [SKLPort5], 1, [1], 1, 6>; +defm : SKLWriteResPair<WriteVarShuffleY, [SKLPort5], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; defm : SKLWriteResPair<WriteBlend, [SKLPort5], 1, [1], 1, 6>; // Vector blends. -defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM). +defm : SKLWriteResPair<WriteBlendY, [SKLPort5], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteBlendZ>; defm : SKLWriteResPair<WriteVarBlend, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends. -defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM). +defm : SKLWriteResPair<WriteVarBlendY, [SKLPort015], 2, [2], 2, 6>; +defm : X86WriteResPairUnsupported<WriteVarBlendZ>; defm : SKLWriteResPair<WriteMPSAD, [SKLPort5], 4, [2], 2, 6>; // Vector MPSAD. -defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; // Vector MPSAD (YMM/ZMM). +defm : SKLWriteResPair<WriteMPSADY, [SKLPort5], 4, [2], 2, 7>; +defm : X86WriteResPairUnsupported<WriteMPSADZ>; defm : SKLWriteResPair<WritePSADBW, [SKLPort5], 3, [1], 1, 5>; // Vector PSADBW. -defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>; // Vector PSADBW (XMM). -defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; // Vector PSADBW (YMM/ZMM). +defm : SKLWriteResPair<WritePSADBWX, [SKLPort5], 3, [1], 1, 6>; +defm : SKLWriteResPair<WritePSADBWY, [SKLPort5], 3, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WritePSADBWZ>; defm : SKLWriteResPair<WritePHMINPOS, [SKLPort01], 4, [1], 1, 6>; // Vector PHMINPOS. // Vector integer shifts. @@ -314,12 +342,15 @@ defm : X86WriteRes<WriteVecShiftX, [SKLPort5,SKLPort01], 2, [1,1], 2>; defm : X86WriteRes<WriteVecShiftY, [SKLPort5,SKLPort01], 4, [1,1], 2>; defm : X86WriteRes<WriteVecShiftXLd, [SKLPort01,SKLPort23], 7, [1,1], 2>; defm : X86WriteRes<WriteVecShiftYLd, [SKLPort01,SKLPort23], 8, [1,1], 2>; +defm : X86WriteResPairUnsupported<WriteVecShiftZ>; -defm : SKLWriteResPair<WriteVecShiftImm, [SKLPort0], 1, [1], 1, 5>; -defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM). -defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM). +defm : SKLWriteResPair<WriteVecShiftImm, [SKLPort0], 1, [1], 1, 5>; // Vector integer immediate shifts. +defm : SKLWriteResPair<WriteVecShiftImmX, [SKLPort01], 1, [1], 1, 6>; +defm : SKLWriteResPair<WriteVecShiftImmY, [SKLPort01], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; defm : SKLWriteResPair<WriteVarVecShift, [SKLPort01], 1, [1], 1, 6>; // Variable vector shifts. -defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM). +defm : SKLWriteResPair<WriteVarVecShiftY, [SKLPort01], 1, [1], 1, 7>; +defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [SKLPort5]> { @@ -346,33 +377,43 @@ def : WriteRes<WriteVecExtractSt, [SKLPort4,SKLPort5,SKLPort237]> { defm : SKLWriteResPair<WriteCvtSS2I, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPS2I, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPS2IY, [SKLPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; defm : SKLWriteResPair<WriteCvtSD2I, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPD2I, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPD2IY, [SKLPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; defm : SKLWriteResPair<WriteCvtI2SS, [SKLPort1], 4>; defm : SKLWriteResPair<WriteCvtI2PS, [SKLPort1], 4>; defm : SKLWriteResPair<WriteCvtI2PSY, [SKLPort1], 4>; +defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; defm : SKLWriteResPair<WriteCvtI2SD, [SKLPort1], 4>; defm : SKLWriteResPair<WriteCvtI2PD, [SKLPort1], 4>; defm : SKLWriteResPair<WriteCvtI2PDY, [SKLPort1], 4>; +defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; defm : SKLWriteResPair<WriteCvtSS2SD, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPS2PD, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPS2PDY, [SKLPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; defm : SKLWriteResPair<WriteCvtSD2SS, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPD2PS, [SKLPort1], 3>; defm : SKLWriteResPair<WriteCvtPD2PSY, [SKLPort1], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; defm : X86WriteRes<WriteCvtPH2PS, [SKLPort5,SKLPort015], 5, [1,1], 2>; defm : X86WriteRes<WriteCvtPH2PSY, [SKLPort5,SKLPort01], 7, [1,1], 2>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZ>; defm : X86WriteRes<WriteCvtPH2PSLd, [SKLPort23,SKLPort01], 9, [1,1], 2>; defm : X86WriteRes<WriteCvtPH2PSYLd, [SKLPort23,SKLPort01], 10, [1,1], 2>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>; defm : X86WriteRes<WriteCvtPS2PH, [SKLPort5,SKLPort015], 5, [1,1], 2>; defm : X86WriteRes<WriteCvtPS2PHY, [SKLPort5,SKLPort01], 7, [1,1], 2>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; defm : X86WriteRes<WriteCvtPS2PHSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 6, [1,1,1,1], 4>; defm : X86WriteRes<WriteCvtPS2PHYSt, [SKLPort4,SKLPort5,SKLPort237,SKLPort01], 8, [1,1,1,1], 4>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // Strings instructions. diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 8ee58e78109..9080d847a7d 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -182,77 +182,94 @@ defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>; defm : X86WriteRes<WriteFMoveY, [SKXPort015], 1, [1], 1>; defm : X86WriteRes<WriteEMMS, [SKXPort05,SKXPort0156], 10, [9,1], 10>; -defm : SKXWriteResPair<WriteFAdd, [SKXPort015], 4, [1], 1, 5>; // Floating point add/sub. -defm : SKXWriteResPair<WriteFAddX, [SKXPort015], 4, [1], 1, 6>; // Floating point add/sub (XMM). -defm : SKXWriteResPair<WriteFAddY, [SKXPort015], 4, [1], 1, 7>; // Floating point add/sub (YMM/ZMM). -defm : SKXWriteResPair<WriteFAdd64, [SKXPort015], 4, [1], 1, 5>; // Floating point double add/sub. -defm : SKXWriteResPair<WriteFAdd64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double add/sub (XMM). -defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double add/sub (YMM/ZMM). - -defm : SKXWriteResPair<WriteFCmp, [SKXPort015], 4, [1], 1, 5>; // Floating point compare. -defm : SKXWriteResPair<WriteFCmpX, [SKXPort015], 4, [1], 1, 6>; // Floating point compare (XMM). -defm : SKXWriteResPair<WriteFCmpY, [SKXPort015], 4, [1], 1, 7>; // Floating point compare (YMM/ZMM). -defm : SKXWriteResPair<WriteFCmp64, [SKXPort015], 4, [1], 1, 5>; // Floating point double compare. -defm : SKXWriteResPair<WriteFCmp64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double compare (XMM). -defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double compare (YMM/ZMM). +defm : SKXWriteResPair<WriteFAdd, [SKXPort01], 4, [1], 1, 5>; // Floating point add/sub. +defm : SKXWriteResPair<WriteFAddX, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFAddY, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFAddZ, [SKXPort05], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFAdd64, [SKXPort01], 4, [1], 1, 5>; // Floating point double add/sub. +defm : SKXWriteResPair<WriteFAdd64X, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFAdd64Y, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFAdd64Z, [SKXPort05], 4, [1], 1, 7>; + +defm : SKXWriteResPair<WriteFCmp, [SKXPort01], 4, [1], 1, 5>; // Floating point compare. +defm : SKXWriteResPair<WriteFCmpX, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFCmpY, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFCmpZ, [SKXPort05], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFCmp64, [SKXPort01], 4, [1], 1, 5>; // Floating point double compare. +defm : SKXWriteResPair<WriteFCmp64X, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFCmp64Y, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFCmp64Z, [SKXPort05], 4, [1], 1, 7>; defm : SKXWriteResPair<WriteFCom, [SKXPort0], 2>; // Floating point compare to flags. -defm : SKXWriteResPair<WriteFMul, [SKXPort015], 4, [1], 1, 5>; // Floating point multiplication. -defm : SKXWriteResPair<WriteFMulX, [SKXPort015], 4, [1], 1, 6>; // Floating point multiplication (XMM). -defm : SKXWriteResPair<WriteFMulY, [SKXPort015], 4, [1], 1, 7>; // Floating point multiplication (YMM/ZMM). -defm : SKXWriteResPair<WriteFMul64, [SKXPort015], 4, [1], 1, 5>; // Floating point double multiplication. -defm : SKXWriteResPair<WriteFMul64X, [SKXPort015], 4, [1], 1, 6>; // Floating point double multiplication (XMM). -defm : SKXWriteResPair<WriteFMul64Y, [SKXPort015], 4, [1], 1, 7>; // Floating point double multiplication (YMM/ZMM). +defm : SKXWriteResPair<WriteFMul, [SKXPort01], 4, [1], 1, 5>; // Floating point multiplication. +defm : SKXWriteResPair<WriteFMulX, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFMulY, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFMulZ, [SKXPort05], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFMul64, [SKXPort01], 4, [1], 1, 5>; // Floating point double multiplication. +defm : SKXWriteResPair<WriteFMul64X, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFMul64Y, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFMul64Z, [SKXPort05], 4, [1], 1, 7>; defm : SKXWriteResPair<WriteFDiv, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. -//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM). -defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM). -defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. // Floating point division (ZMM). +//defm : SKXWriteResPair<WriteFDivX, [SKXPort0,SKXFPDivider], 11, [1,3], 1, 6>; // 10-14 cycles. +defm : SKXWriteResPair<WriteFDivY, [SKXPort0,SKXFPDivider], 11, [1,5], 1, 7>; // 10-14 cycles. +defm : SKXWriteResPair<WriteFDivZ, [SKXPort0,SKXPort5,SKXFPDivider], 18, [2,1,10], 3, 7>; // 10-14 cycles. //defm : SKXWriteResPair<WriteFDiv64, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 5>; // 10-14 cycles. // Floating point division. -//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. // Floating point division (XMM). -//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. // Floating point division (YMM). -defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. // Floating point division (ZMM). +//defm : SKXWriteResPair<WriteFDiv64X, [SKXPort0,SKXFPDivider], 14, [1,3], 1, 6>; // 10-14 cycles. +//defm : SKXWriteResPair<WriteFDiv64Y, [SKXPort0,SKXFPDivider], 14, [1,5], 1, 7>; // 10-14 cycles. +defm : SKXWriteResPair<WriteFDiv64Z, [SKXPort0,SKXPort5,SKXFPDivider], 23, [2,1,16], 3, 7>; // 10-14 cycles. defm : SKXWriteResPair<WriteFSqrt, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 5>; // Floating point square root. -defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; // Floating point square root (XMM). -defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; // Floating point square root (YMM). -defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; // Floating point square root (ZMM). +defm : SKXWriteResPair<WriteFSqrtX, [SKXPort0,SKXFPDivider], 12, [1,3], 1, 6>; +defm : SKXWriteResPair<WriteFSqrtY, [SKXPort0,SKXFPDivider], 12, [1,6], 1, 7>; +defm : SKXWriteResPair<WriteFSqrtZ, [SKXPort0,SKXPort5,SKXFPDivider], 20, [2,1,12], 3, 7>; defm : SKXWriteResPair<WriteFSqrt64, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 5>; // Floating point double square root. -defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; // Floating point double square root (XMM). -defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; // Floating point double square root (YMM). -defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; // Floating point double square root (ZMM). +defm : SKXWriteResPair<WriteFSqrt64X, [SKXPort0,SKXFPDivider], 18, [1,6], 1, 6>; +defm : SKXWriteResPair<WriteFSqrt64Y, [SKXPort0,SKXFPDivider], 18, [1,12],1, 7>; +defm : SKXWriteResPair<WriteFSqrt64Z, [SKXPort0,SKXPort5,SKXFPDivider], 32, [2,1,24], 3, 7>; defm : SKXWriteResPair<WriteFSqrt80, [SKXPort0,SKXFPDivider], 21, [1,7]>; // Floating point long double square root. defm : SKXWriteResPair<WriteFRcp, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal estimate. -defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal estimate (XMM). -defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal estimate (YMM/ZMM). +defm : SKXWriteResPair<WriteFRcpX, [SKXPort0], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFRcpY, [SKXPort0], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFRcpZ, [SKXPort0,SKXPort5], 4, [2,1], 3, 7>; defm : SKXWriteResPair<WriteFRsqrt, [SKXPort0], 4, [1], 1, 5>; // Floating point reciprocal square root estimate. -defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; // Floating point reciprocal square root estimate (XMM). -defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; // Floating point reciprocal square root estimate (YMM/ZMM). - -defm : SKXWriteResPair<WriteFMA, [SKXPort015], 4, [1], 1, 5>; // Fused Multiply Add. -defm : SKXWriteResPair<WriteFMAX, [SKXPort015], 4, [1], 1, 6>; // Fused Multiply Add (XMM). -defm : SKXWriteResPair<WriteFMAY, [SKXPort015], 4, [1], 1, 7>; // Fused Multiply Add (YMM/ZMM). +defm : SKXWriteResPair<WriteFRsqrtX,[SKXPort0], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFRsqrtY,[SKXPort0], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFRsqrtZ,[SKXPort0,SKXPort5], 9, [2,1], 3, 7>; + +defm : SKXWriteResPair<WriteFMA, [SKXPort01], 4, [1], 1, 5>; // Fused Multiply Add. +defm : SKXWriteResPair<WriteFMAX, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteFMAY, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteFMAZ, [SKXPort05], 4, [1], 1, 7>; defm : SKXWriteResPair<WriteDPPD, [SKXPort5,SKXPort015], 9, [1,2], 3, 6>; // Floating point double dot product. -defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; // Floating point single dot product. -defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; // Floating point single dot product (YMM). +defm : SKXWriteResPair<WriteDPPS, [SKXPort5,SKXPort015], 13, [1,3], 4, 6>; +defm : SKXWriteResPair<WriteDPPSY,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; +defm : SKXWriteResPair<WriteDPPSZ,[SKXPort5,SKXPort015], 13, [1,3], 4, 7>; defm : SKXWriteResPair<WriteFSign, [SKXPort0], 1>; // Floating point fabs/fchs. -defm : SKXWriteResPair<WriteFRnd, [SKXPort015], 8, [2], 2, 6>; // Floating point rounding. -defm : SKXWriteResPair<WriteFRndY, [SKXPort015], 8, [2], 2, 7>; // Floating point rounding (YMM/ZMM). +defm : SKXWriteResPair<WriteFRnd, [SKXPort01], 8, [2], 2, 6>; // Floating point rounding. +defm : SKXWriteResPair<WriteFRndY, [SKXPort01], 8, [2], 2, 7>; +defm : SKXWriteResPair<WriteFRndZ, [SKXPort05], 8, [2], 2, 7>; defm : SKXWriteResPair<WriteFLogic, [SKXPort015], 1, [1], 1, 6>; // Floating point and/or/xor logicals. -defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; // Floating point and/or/xor logicals (YMM/ZMM). +defm : SKXWriteResPair<WriteFLogicY, [SKXPort015], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteFLogicZ, [SKXPort05], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteFTest, [SKXPort0], 2, [1], 1, 6>; // Floating point TEST instructions. -defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; // Floating point TEST instructions (YMM/ZMM). +defm : SKXWriteResPair<WriteFTestY, [SKXPort0], 2, [1], 1, 7>; +defm : SKXWriteResPair<WriteFTestZ, [SKXPort0], 2, [1], 1, 7>; defm : SKXWriteResPair<WriteFShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector shuffles. -defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector shuffles (YMM/ZMM). +defm : SKXWriteResPair<WriteFShuffleY, [SKXPort5], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteFShuffleZ, [SKXPort5], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteFVarShuffle, [SKXPort5], 1, [1], 1, 6>; // Floating point vector variable shuffles. -defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Floating point vector variable shuffles. +defm : SKXWriteResPair<WriteFVarShuffleY, [SKXPort5], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteFVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteFBlend, [SKXPort015], 1, [1], 1, 6>; // Floating point vector blends. -defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; // Floating point vector blends. +defm : SKXWriteResPair<WriteFBlendY,[SKXPort015], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteFBlendZ,[SKXPort015], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteFVarBlend, [SKXPort015], 2, [2], 2, 6>; // Fp vector variable blends. -defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; // Fp vector variable blends. +defm : SKXWriteResPair<WriteFVarBlendY,[SKXPort015], 2, [2], 2, 7>; +defm : SKXWriteResPair<WriteFVarBlendZ,[SKXPort015], 2, [2], 2, 7>; // FMA Scheduling helper class. // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } @@ -279,47 +296,62 @@ defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>; defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>; defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. -defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM). -defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; // Vector integer ALU op, no logicals (YMM/ZMM). +defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; +defm : SKXWriteResPair<WriteVecALUY, [SKXPort01], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteVecALUZ, [SKXPort0], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteVecLogic, [SKXPort05], 1, [1], 1, 5>; // Vector integer and/or/xor. -defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; // Vector integer and/or/xor (XMM). -defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; // Vector integer and/or/xor (YMM/ZMM). +defm : SKXWriteResPair<WriteVecLogicX,[SKXPort015], 1, [1], 1, 6>; +defm : SKXWriteResPair<WriteVecLogicY,[SKXPort015], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteVecLogicZ,[SKXPort05], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteVecTest, [SKXPort0,SKXPort5], 3, [1,1], 2, 6>; // Vector integer TEST instructions. -defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; // Vector integer TEST instructions (YMM/ZMM). +defm : SKXWriteResPair<WriteVecTestY, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; +defm : SKXWriteResPair<WriteVecTestZ, [SKXPort0,SKXPort5], 3, [1,1], 2, 7>; defm : SKXWriteResPair<WriteVecIMul, [SKXPort0], 4, [1], 1, 5>; // Vector integer multiply. -defm : SKXWriteResPair<WriteVecIMulX, [SKXPort015], 4, [1], 1, 6>; // Vector integer multiply (XMM). -defm : SKXWriteResPair<WriteVecIMulY, [SKXPort015], 4, [1], 1, 7>; // Vector integer multiply (YMM/ZMM). -defm : SKXWriteResPair<WritePMULLD, [SKXPort015], 10, [2], 2, 6>; // Vector PMULLD. -defm : SKXWriteResPair<WritePMULLDY, [SKXPort015], 10, [2], 2, 7>; // Vector PMULLD (YMM/ZMM). +defm : SKXWriteResPair<WriteVecIMulX, [SKXPort01], 4, [1], 1, 6>; +defm : SKXWriteResPair<WriteVecIMulY, [SKXPort01], 4, [1], 1, 7>; +defm : SKXWriteResPair<WriteVecIMulZ, [SKXPort05], 4, [1], 1, 7>; +defm : SKXWriteResPair<WritePMULLD, [SKXPort01], 10, [2], 2, 6>; // Vector PMULLD. +defm : SKXWriteResPair<WritePMULLDY, [SKXPort01], 10, [2], 2, 7>; +defm : SKXWriteResPair<WritePMULLDZ, [SKXPort05], 10, [2], 2, 7>; defm : SKXWriteResPair<WriteShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector shuffles. -defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector shuffles (XMM). -defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector shuffles (YMM/ZMM). +defm : SKXWriteResPair<WriteShuffleX, [SKXPort5], 1, [1], 1, 6>; +defm : SKXWriteResPair<WriteShuffleY, [SKXPort5], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteShuffleZ, [SKXPort5], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteVarShuffle, [SKXPort5], 1, [1], 1, 5>; // Vector variable shuffles. -defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; // Vector variable shuffles (XMM). -defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; // Vector variable shuffles (YMM/ZMM). +defm : SKXWriteResPair<WriteVarShuffleX, [SKXPort5], 1, [1], 1, 6>; +defm : SKXWriteResPair<WriteVarShuffleY, [SKXPort5], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteVarShuffleZ, [SKXPort5], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteBlend, [SKXPort5], 1, [1], 1, 6>; // Vector blends. -defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; // Vector blends (YMM/ZMM). +defm : SKXWriteResPair<WriteBlendY,[SKXPort5], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteBlendZ,[SKXPort5], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteVarBlend, [SKXPort015], 2, [2], 2, 6>; // Vector variable blends. -defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; // Vector variable blends (YMM/ZMM). +defm : SKXWriteResPair<WriteVarBlendY,[SKXPort015], 2, [2], 2, 6>; +defm : SKXWriteResPair<WriteVarBlendZ,[SKXPort05], 2, [1], 1, 6>; defm : SKXWriteResPair<WriteMPSAD, [SKXPort5], 4, [2], 2, 6>; // Vector MPSAD. -defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; // Vector MPSAD. +defm : SKXWriteResPair<WriteMPSADY, [SKXPort5], 4, [2], 2, 7>; +defm : SKXWriteResPair<WriteMPSADZ, [SKXPort5], 4, [2], 2, 7>; defm : SKXWriteResPair<WritePSADBW, [SKXPort5], 3, [1], 1, 5>; // Vector PSADBW. -defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; // Vector PSADBW. -defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; // Vector PSADBW. -defm : SKXWriteResPair<WritePHMINPOS, [SKXPort015], 4, [1], 1, 6>; // Vector PHMINPOS. +defm : SKXWriteResPair<WritePSADBWX, [SKXPort5], 3, [1], 1, 6>; +defm : SKXWriteResPair<WritePSADBWY, [SKXPort5], 3, [1], 1, 7>; +defm : SKXWriteResPair<WritePSADBWZ, [SKXPort5], 3, [1], 1, 7>; +defm : SKXWriteResPair<WritePHMINPOS, [SKXPort0], 4, [1], 1, 6>; // Vector PHMINPOS. // Vector integer shifts. defm : SKXWriteResPair<WriteVecShift, [SKXPort0], 1, [1], 1, 5>; defm : X86WriteRes<WriteVecShiftX, [SKXPort5,SKXPort01], 2, [1,1], 2>; defm : X86WriteRes<WriteVecShiftY, [SKXPort5,SKXPort01], 4, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftZ, [SKXPort5,SKXPort0], 4, [1,1], 2>; defm : X86WriteRes<WriteVecShiftXLd, [SKXPort01,SKXPort23], 7, [1,1], 2>; defm : X86WriteRes<WriteVecShiftYLd, [SKXPort01,SKXPort23], 8, [1,1], 2>; +defm : X86WriteRes<WriteVecShiftZLd, [SKXPort0,SKXPort23], 8, [1,1], 2>; defm : SKXWriteResPair<WriteVecShiftImm, [SKXPort0], 1, [1], 1, 5>; -defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts (XMM). -defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; // Vector integer immediate shifts (YMM/ZMM). +defm : SKXWriteResPair<WriteVecShiftImmX, [SKXPort01], 1, [1], 1, 6>; // Vector integer immediate shifts. +defm : SKXWriteResPair<WriteVecShiftImmY, [SKXPort01], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteVecShiftImmZ, [SKXPort0], 1, [1], 1, 7>; defm : SKXWriteResPair<WriteVarVecShift, [SKXPort01], 1, [1], 1, 6>; // Variable vector shifts. -defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; // Variable vector shifts (YMM/ZMM). +defm : SKXWriteResPair<WriteVarVecShiftY, [SKXPort01], 1, [1], 1, 7>; +defm : SKXWriteResPair<WriteVarVecShiftZ, [SKXPort0], 1, [1], 1, 7>; // Vector insert/extract operations. def : WriteRes<WriteVecInsert, [SKXPort5]> { @@ -343,36 +375,46 @@ def : WriteRes<WriteVecExtractSt, [SKXPort4,SKXPort5,SKXPort237]> { } // Conversion between integer and float. -defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort0,SKXPort015], 6, [1,1], 2>; -defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort1], 3>; -defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort1], 3>; -defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort0,SKXPort015], 6, [1,1], 2>; -defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort1], 3>; -defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort1], 3>; +defm : SKXWriteResPair<WriteCvtSS2I, [SKXPort01], 6, [2], 2>; // Needs more work: DD vs DQ. +defm : SKXWriteResPair<WriteCvtPS2I, [SKXPort01], 3>; +defm : SKXWriteResPair<WriteCvtPS2IY, [SKXPort01], 3>; +defm : SKXWriteResPair<WriteCvtPS2IZ, [SKXPort05], 3>; +defm : SKXWriteResPair<WriteCvtSD2I, [SKXPort01], 6, [2], 2>; +defm : SKXWriteResPair<WriteCvtPD2I, [SKXPort01], 3>; +defm : SKXWriteResPair<WriteCvtPD2IY, [SKXPort01], 3>; +defm : SKXWriteResPair<WriteCvtPD2IZ, [SKXPort05], 3>; defm : SKXWriteResPair<WriteCvtI2SS, [SKXPort1], 4>; -defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort1], 4>; -defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort1], 4>; +defm : SKXWriteResPair<WriteCvtI2PS, [SKXPort01], 4>; +defm : SKXWriteResPair<WriteCvtI2PSY, [SKXPort01], 4>; +defm : SKXWriteResPair<WriteCvtI2PSZ, [SKXPort05], 4>; // Needs more work: DD vs DQ. defm : SKXWriteResPair<WriteCvtI2SD, [SKXPort1], 4>; -defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort0,SKXPort5], 5, [1,1], 2>; -defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort1], 4>; +defm : SKXWriteResPair<WriteCvtI2PD, [SKXPort01], 4>; +defm : SKXWriteResPair<WriteCvtI2PDY, [SKXPort01], 4>; +defm : SKXWriteResPair<WriteCvtI2PDZ, [SKXPort05], 4>; defm : SKXWriteResPair<WriteCvtSS2SD, [SKXPort1], 3>; defm : SKXWriteResPair<WriteCvtPS2PD, [SKXPort1], 3>; -defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort1], 3>; +defm : SKXWriteResPair<WriteCvtPS2PDY, [SKXPort5,SKXPort01], 3, [1,1], 2>; +defm : SKXWriteResPair<WriteCvtPS2PDZ, [SKXPort05], 3, [2], 2>; defm : SKXWriteResPair<WriteCvtSD2SS, [SKXPort1], 3>; defm : SKXWriteResPair<WriteCvtPD2PS, [SKXPort1], 3>; -defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort1], 3>; - -defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort015], 5, [1,1], 2>; -defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort015], 7, [1,1], 2>; -defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort015], 9, [1,1], 2>; -defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort015], 10, [1,1], 2>; - -defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort015], 5, [1,1], 2>; -defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort015], 7, [1,1], 2>; -defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 6, [1,1,1,1], 4>; -defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort015], 8, [1,1,1,1], 4>; +defm : SKXWriteResPair<WriteCvtPD2PSY, [SKXPort5,SKXPort01], 3, [1,1], 2>; +defm : SKXWriteResPair<WriteCvtPD2PSZ, [SKXPort05], 3, [2], 2>; + +defm : X86WriteRes<WriteCvtPH2PS, [SKXPort5,SKXPort01], 5, [1,1], 2>; +defm : X86WriteRes<WriteCvtPH2PSY, [SKXPort5,SKXPort01], 7, [1,1], 2>; +defm : X86WriteRes<WriteCvtPH2PSZ, [SKXPort5,SKXPort0], 7, [1,1], 2>; +defm : X86WriteRes<WriteCvtPH2PSLd, [SKXPort23,SKXPort01], 9, [1,1], 2>; +defm : X86WriteRes<WriteCvtPH2PSYLd, [SKXPort23,SKXPort01], 10, [1,1], 2>; +defm : X86WriteRes<WriteCvtPH2PSZLd, [SKXPort23,SKXPort05], 10, [1,1], 2>; + +defm : X86WriteRes<WriteCvtPS2PH, [SKXPort5,SKXPort01], 5, [1,1], 2>; +defm : X86WriteRes<WriteCvtPS2PHY, [SKXPort5,SKXPort01], 7, [1,1], 2>; +defm : X86WriteRes<WriteCvtPS2PHZ, [SKXPort5,SKXPort05], 7, [1,1], 2>; +defm : X86WriteRes<WriteCvtPS2PHSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 6, [1,1,1,1], 4>; +defm : X86WriteRes<WriteCvtPS2PHYSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort01], 8, [1,1,1,1], 4>; +defm : X86WriteRes<WriteCvtPS2PHZSt, [SKXPort4,SKXPort5,SKXPort237,SKXPort05], 8, [1,1,1,1], 4>; // Strings instructions. @@ -589,15 +631,15 @@ def SKXWriteResGroup9 : SchedWriteRes<[SKXPort015]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z|Z128|Z256)rr", - "VBLENDMPS(Z|Z128|Z256)rr", +def: InstRW<[SKXWriteResGroup9], (instregex "VBLENDMPD(Z128|Z256)rr", + "VBLENDMPS(Z128|Z256)rr", "VPADD(B|D|Q|W)(Y|Z|Z128|Z256)rr", "(V?)PADD(B|D|Q|W)rr", "VPBLENDD(Y?)rri", - "VPBLENDMB(Z|Z128|Z256)rr", - "VPBLENDMD(Z|Z128|Z256)rr", - "VPBLENDMQ(Z|Z128|Z256)rr", - "VPBLENDMW(Z|Z128|Z256)rr", + "VPBLENDMB(Z128|Z256)rr", + "VPBLENDMD(Z128|Z256)rr", + "VPBLENDMQ(Z128|Z256)rr", + "VPBLENDMW(Z128|Z256)rr", "VPSUB(B|D|Q|W)(Y|Z|Z128|Z256)rr", "(V?)PSUB(B|D|Q|W)rr", "VPTERNLOGD(Z|Z128|Z256)rri", @@ -611,8 +653,7 @@ def SKXWriteResGroup10 : SchedWriteRes<[SKXPort0156]> { def: InstRW<[SKXWriteResGroup10], (instrs CBW, CWDE, CDQE, CMC, STC)>; def: InstRW<[SKXWriteResGroup10], (instrs LAHF, SAHF)>; // TODO: This doesn't match Agner's data -def: InstRW<[SKXWriteResGroup10], (instregex "NOOP", - "SGDT64m", +def: InstRW<[SKXWriteResGroup10], (instregex "SGDT64m", "SIDT64m", "SMSW16m", "STRm", @@ -739,8 +780,7 @@ def SKXWriteResGroup30 : SchedWriteRes<[SKXPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup30], (instregex "KADD(B|D|Q|W)rr", - "KMOV(B|D|Q|W)rk", +def: InstRW<[SKXWriteResGroup30], (instregex "KMOV(B|D|Q|W)rk", "KORTEST(B|D|Q|W)rr", "KTEST(B|D|Q|W)rr")>; @@ -768,6 +808,7 @@ def SKXWriteResGroup32 : SchedWriteRes<[SKXPort5]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup32], (instregex "(ADD|SUB|SUBR)_(FPrST0|FST0r|FrST0)", + "KADD(B|D|Q|W)rr", "KSHIFTL(B|D|Q|W)ri", "KSHIFTR(B|D|Q|W)ri", "KUNPCKBWrr", @@ -907,26 +948,44 @@ def SKXWriteResGroup49 : SchedWriteRes<[SKXPort0]> { } def: InstRW<[SKXWriteResGroup49], (instregex "MUL_(FPrST0|FST0r|FrST0)")>; -def SKXWriteResGroup50 : SchedWriteRes<[SKXPort015]> { +def SKXWriteResGroup50 : SchedWriteRes<[SKXPort01]> { let Latency = 4; let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z|Z128|Z256)rr", +def: InstRW<[SKXWriteResGroup50], (instregex "VCVTDQ2PS(Y|Z128|Z256)rr", "(V?)CVTDQ2PSrr", - "VCVTPD2QQ(Z|Z128|Z256)rr", - "VCVTPD2UQQ(Z|Z128|Z256)rr", - "VCVTPS2DQ(Y|Z|Z128|Z256)rr", + "VCVTPD2QQ(Z128|Z256)rr", + "VCVTPD2UQQ(Z128|Z256)rr", + "VCVTPS2DQ(Y|Z128|Z256)rr", "(V?)CVTPS2DQrr", - "VCVTPS2UDQ(Z|Z128|Z256)rr", - "VCVTQQ2PD(Z|Z128|Z256)rr", - "VCVTTPD2QQ(Z|Z128|Z256)rr", - "VCVTTPD2UQQ(Z|Z128|Z256)rr", - "VCVTTPS2DQ(Y|Z|Z128|Z256)rr", + "VCVTPS2UDQ(Z128|Z256)rr", + "VCVTQQ2PD(Z128|Z256)rr", + "VCVTTPD2QQ(Z128|Z256)rr", + "VCVTTPD2UQQ(Z128|Z256)rr", + "VCVTTPS2DQ(Z128|Z256)rr", "(V?)CVTTPS2DQrr", - "VCVTTPS2UDQ(Z|Z128|Z256)rr", - "VCVTUDQ2PS(Z|Z128|Z256)rr", - "VCVTUQQ2PD(Z|Z128|Z256)rr")>; + "VCVTTPS2UDQ(Z128|Z256)rr", + "VCVTUDQ2PS(Z128|Z256)rr", + "VCVTUQQ2PD(Z128|Z256)rr")>; + +def SKXWriteResGroup50z : SchedWriteRes<[SKXPort05]> { + let Latency = 4; + let NumMicroOps = 1; + let ResourceCycles = [1]; +} +def: InstRW<[SKXWriteResGroup50z], (instrs VCVTDQ2PSZrr, + VCVTPD2QQZrr, + VCVTPD2UQQZrr, + VCVTPS2DQZrr, + VCVTPS2UDQZrr, + VCVTQQ2PDZrr, + VCVTTPD2QQZrr, + VCVTTPD2UQQZrr, + VCVTTPS2DQZrr, + VCVTTPS2UDQZrr, + VCVTUDQ2PSZrr, + VCVTUQQ2PDZrr)>; def SKXWriteResGroup51 : SchedWriteRes<[SKXPort5]> { let Latency = 4; @@ -1010,13 +1069,6 @@ def: InstRW<[SKXWriteResGroup58], (instregex "MOVSX(16|32|64)rm16", "MOVZX(16|32|64)rm8", "(V?)MOVDDUPrm")>; // TODO: Should this be SKXWriteResGroup71? -def SKXWriteResGroup59 : SchedWriteRes<[SKXPort015]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SKXWriteResGroup59], (instregex "VCVTSD2SSZrr")>; - def SKXWriteResGroup61 : SchedWriteRes<[SKXPort5,SKXPort015]> { let Latency = 5; let NumMicroOps = 2; @@ -1035,7 +1087,7 @@ def: InstRW<[SKXWriteResGroup61], (instregex "MMX_CVT(T?)PD2PIirr", "VCVTPS2QQZ128rr", "VCVTPS2UQQZ128rr", "VCVTQQ2PSZ128rr", - "(V?)CVTSD2SSrr", + "(V?)CVTSD2SS(Z?)rr", "(V?)CVTSI(64)?2SDrr", "VCVTSI2SSZrr", "(V?)CVTSI2SSrr", @@ -1136,7 +1188,7 @@ def: InstRW<[SKXWriteResGroup71], (instregex "VBROADCASTSSrm", "VPBROADCASTDrm", "VPBROADCASTQrm")>; -def SKXWriteResGroup72 : SchedWriteRes<[SKXPort0]> { +def SKXWriteResGroup72 : SchedWriteRes<[SKXPort5]> { let Latency = 6; let NumMicroOps = 2; let ResourceCycles = [2]; @@ -1286,7 +1338,7 @@ def: InstRW<[SKXWriteResGroup89], (instregex "LD_F(32|64|80)m", "VPBROADCASTDYrm", "VPBROADCASTQYrm")>; -def SKXWriteResGroup90 : SchedWriteRes<[SKXPort0,SKXPort5]> { +def SKXWriteResGroup90 : SchedWriteRes<[SKXPort01,SKXPort5]> { let Latency = 7; let NumMicroOps = 2; let ResourceCycles = [1,1]; @@ -1318,21 +1370,40 @@ def SKXWriteResGroup93 : SchedWriteRes<[SKXPort5,SKXPort015]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PD(Z|Z256)rr", - "VCVTPD2DQ(Y|Z|Z256)rr", - "VCVTPD2PS(Y|Z|Z256)rr", - "VCVTPD2UDQ(Z|Z256)rr", - "VCVTPS2PD(Y|Z|Z256)rr", - "VCVTPS2QQ(Z|Z256)rr", - "VCVTPS2UQQ(Z|Z256)rr", - "VCVTQQ2PS(Z|Z256)rr", - "VCVTTPD2DQ(Y|Z|Z256)rr", - "VCVTTPD2UDQ(Z|Z256)rr", - "VCVTTPS2QQ(Z|Z256)rr", - "VCVTTPS2UQQ(Z|Z256)rr", - "VCVTUDQ2PD(Z|Z256)rr", - "VCVTUQQ2PS(Z|Z256)rr")>; - +def: InstRW<[SKXWriteResGroup93], (instregex "VCVTDQ2PDZ256rr", + "VCVTPD2DQ(Y|Z256)rr", + "VCVTPD2PS(Y|Z256)rr", + "VCVTPD2UDQZ256rr", + "VCVTPS2PD(Y|Z256)rr", + "VCVTPS2QQZ256rr", + "VCVTPS2UQQZ256rr", + "VCVTQQ2PSZ256rr", + "VCVTTPD2DQ(Y|Z256)rr", + "VCVTTPD2UDQZ256rr", + "VCVTTPS2QQZ256rr", + "VCVTTPS2UQQZ256rr", + "VCVTUDQ2PDZ256rr", + "VCVTUQQ2PSZ256rr")>; + +def SKXWriteResGroup93z : SchedWriteRes<[SKXPort5,SKXPort05]> { + let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [1,1]; +} +def: InstRW<[SKXWriteResGroup93z], (instrs VCVTDQ2PDZrr, + VCVTPD2DQZrr, + VCVTPD2PSZrr, + VCVTPD2UDQZrr, + VCVTPS2PDZrr, + VCVTPS2QQZrr, + VCVTPS2UQQZrr, + VCVTQQ2PSZrr, + VCVTTPD2DQZrr, + VCVTTPD2UDQZrr, + VCVTTPS2QQZrr, + VCVTTPS2UQQZrr, + VCVTUDQ2PDZrr, + VCVTUQQ2PSZrr)>; def SKXWriteResGroup95 : SchedWriteRes<[SKXPort23,SKXPort015]> { let Latency = 7; @@ -1405,8 +1476,8 @@ def SKXWriteResGroup100 : SchedWriteRes<[SKXPort0,SKXPort5,SKXPort015]> { let ResourceCycles = [1,1,1]; } def: InstRW<[SKXWriteResGroup100], (instregex "VCVTSS2USI64Zrr", - "VCVTTSS2SI(64)?Zrr", - "(V?)CVTTSS2SI(64)?rr", + "(V?)CVTSS2SI64(Z?)rr", + "(V?)CVTTSS2SI64(Z?)rr", "VCVTTSS2USI64Zrr")>; def SKXWriteResGroup101 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort05]> { @@ -1751,16 +1822,6 @@ def SKXWriteResGroup137 : SchedWriteRes<[SKXPort23,SKXPort015]> { def: InstRW<[SKXWriteResGroup137], (instregex "MMX_CVT(T?)PS2PIirm", "(V?)CVTPS2PDrm")>; -def SKXWriteResGroup138 : SchedWriteRes<[SKXPort0,SKXPort015]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SKXWriteResGroup138], (instregex "VRCP14PDZr(b?)", - "VRCP14PSZr(b?)", - "VRSQRT14PDZr(b?)", - "VRSQRT14PSZr(b?)")>; - def SKXWriteResGroup142 : SchedWriteRes<[SKXPort1,SKXPort5,SKXPort23]> { let Latency = 9; let NumMicroOps = 3; @@ -2009,12 +2070,19 @@ def SKXWriteResGroup171 : SchedWriteRes<[SKXPort06,SKXPort0156]> { } def: InstRW<[SKXWriteResGroup171], (instrs LOOPE, LOOPNE)>; -def SKXWriteResGroup174 : SchedWriteRes<[SKXPort015]> { +def SKXWriteResGroup174 : SchedWriteRes<[SKXPort01]> { + let Latency = 12; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z128|Z256)rr")>; + +def SKXWriteResGroup174z : SchedWriteRes<[SKXPort05]> { let Latency = 12; let NumMicroOps = 3; let ResourceCycles = [3]; } -def: InstRW<[SKXWriteResGroup174], (instregex "VPMULLQ(Z|Z128|Z256)rr")>; +def: InstRW<[SKXWriteResGroup174z], (instregex "VPMULLQZrr")>; def SKXWriteResGroup175 : SchedWriteRes<[SKXPort5,SKXPort23]> { let Latency = 12; @@ -2143,16 +2211,6 @@ def SKXWriteResGroup195 : SchedWriteRes<[SKXPort1,SKXPort23,SKXPort237,SKXPort06 } def: InstRW<[SKXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>; -def SKXWriteResGroup198 : SchedWriteRes<[SKXPort0,SKXPort23,SKXPort015]> { - let Latency = 16; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SKXWriteResGroup198], (instregex "VRCP14PDZm(b?)", - "VRCP14PSZm(b?)", - "VRSQRT14PDZm(b?)", - "VRSQRT14PSZm(b?)")>; - def SKXWriteResGroup199 : SchedWriteRes<[SKXPort4,SKXPort23,SKXPort237,SKXPort06,SKXPort15,SKXPort0156]> { let Latency = 16; let NumMicroOps = 14; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index 8aad2ad7009..be3ff86e0b4 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -62,7 +62,6 @@ multiclass X86WriteResPairUnsupported<X86FoldableSchedWrite SchedRW> { } } - // Multiclass that wraps X86FoldableSchedWrite for each vector width. class X86SchedWriteWidths<X86FoldableSchedWrite sScl, X86FoldableSchedWrite s128, @@ -177,23 +176,29 @@ def WriteFMoveY : SchedWrite; defm WriteFAdd : X86SchedWritePair; // Floating point add/sub. defm WriteFAddX : X86SchedWritePair; // Floating point add/sub (XMM). -defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM/ZMM). +defm WriteFAddY : X86SchedWritePair; // Floating point add/sub (YMM). +defm WriteFAddZ : X86SchedWritePair; // Floating point add/sub (ZMM). defm WriteFAdd64 : X86SchedWritePair; // Floating point double add/sub. defm WriteFAdd64X : X86SchedWritePair; // Floating point double add/sub (XMM). -defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM/ZMM). +defm WriteFAdd64Y : X86SchedWritePair; // Floating point double add/sub (YMM). +defm WriteFAdd64Z : X86SchedWritePair; // Floating point double add/sub (ZMM). defm WriteFCmp : X86SchedWritePair; // Floating point compare. defm WriteFCmpX : X86SchedWritePair; // Floating point compare (XMM). -defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM/ZMM). +defm WriteFCmpY : X86SchedWritePair; // Floating point compare (YMM). +defm WriteFCmpZ : X86SchedWritePair; // Floating point compare (ZMM). defm WriteFCmp64 : X86SchedWritePair; // Floating point double compare. defm WriteFCmp64X : X86SchedWritePair; // Floating point double compare (XMM). -defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM/ZMM). +defm WriteFCmp64Y : X86SchedWritePair; // Floating point double compare (YMM). +defm WriteFCmp64Z : X86SchedWritePair; // Floating point double compare (ZMM). defm WriteFCom : X86SchedWritePair; // Floating point compare to flags. defm WriteFMul : X86SchedWritePair; // Floating point multiplication. defm WriteFMulX : X86SchedWritePair; // Floating point multiplication (XMM). -defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM/ZMM). +defm WriteFMulY : X86SchedWritePair; // Floating point multiplication (YMM). +defm WriteFMulZ : X86SchedWritePair; // Floating point multiplication (YMM). defm WriteFMul64 : X86SchedWritePair; // Floating point double multiplication. defm WriteFMul64X : X86SchedWritePair; // Floating point double multiplication (XMM). -defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM/ZMM). +defm WriteFMul64Y : X86SchedWritePair; // Floating point double multiplication (YMM). +defm WriteFMul64Z : X86SchedWritePair; // Floating point double multiplication (ZMM). defm WriteFDiv : X86SchedWritePair; // Floating point division. defm WriteFDivX : X86SchedWritePair; // Floating point division (XMM). defm WriteFDivY : X86SchedWritePair; // Floating point division (YMM). @@ -213,41 +218,54 @@ defm WriteFSqrt64Z : X86SchedWritePair; // Floating point double square root (ZM defm WriteFSqrt80 : X86SchedWritePair; // Floating point long double square root. defm WriteFRcp : X86SchedWritePair; // Floating point reciprocal estimate. defm WriteFRcpX : X86SchedWritePair; // Floating point reciprocal estimate (XMM). -defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM/ZMM). +defm WriteFRcpY : X86SchedWritePair; // Floating point reciprocal estimate (YMM). +defm WriteFRcpZ : X86SchedWritePair; // Floating point reciprocal estimate (ZMM). defm WriteFRsqrt : X86SchedWritePair; // Floating point reciprocal square root estimate. defm WriteFRsqrtX: X86SchedWritePair; // Floating point reciprocal square root estimate (XMM). -defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM/ZMM). +defm WriteFRsqrtY: X86SchedWritePair; // Floating point reciprocal square root estimate (YMM). +defm WriteFRsqrtZ: X86SchedWritePair; // Floating point reciprocal square root estimate (ZMM). defm WriteFMA : X86SchedWritePair; // Fused Multiply Add. defm WriteFMAX : X86SchedWritePair; // Fused Multiply Add (XMM). -defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM/ZMM). +defm WriteFMAY : X86SchedWritePair; // Fused Multiply Add (YMM). +defm WriteFMAZ : X86SchedWritePair; // Fused Multiply Add (ZMM). defm WriteDPPD : X86SchedWritePair; // Floating point double dot product. defm WriteDPPS : X86SchedWritePair; // Floating point single dot product. defm WriteDPPSY : X86SchedWritePair; // Floating point single dot product (YMM). +defm WriteDPPSZ : X86SchedWritePair; // Floating point single dot product (ZMM). defm WriteFSign : X86SchedWritePair; // Floating point fabs/fchs. defm WriteFRnd : X86SchedWritePair; // Floating point rounding. -defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM/ZMM). +defm WriteFRndY : X86SchedWritePair; // Floating point rounding (YMM). +defm WriteFRndZ : X86SchedWritePair; // Floating point rounding (ZMM). defm WriteFLogic : X86SchedWritePair; // Floating point and/or/xor logicals. -defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM/ZMM). +defm WriteFLogicY : X86SchedWritePair; // Floating point and/or/xor logicals (YMM). +defm WriteFLogicZ : X86SchedWritePair; // Floating point and/or/xor logicals (ZMM). defm WriteFTest : X86SchedWritePair; // Floating point TEST instructions. -defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM/ZMM). +defm WriteFTestY : X86SchedWritePair; // Floating point TEST instructions (YMM). +defm WriteFTestZ : X86SchedWritePair; // Floating point TEST instructions (ZMM). defm WriteFShuffle : X86SchedWritePair; // Floating point vector shuffles. -defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM/ZMM). +defm WriteFShuffleY : X86SchedWritePair; // Floating point vector shuffles (YMM). +defm WriteFShuffleZ : X86SchedWritePair; // Floating point vector shuffles (ZMM). defm WriteFVarShuffle : X86SchedWritePair; // Floating point vector variable shuffles. -defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM/ZMM). +defm WriteFVarShuffleY : X86SchedWritePair; // Floating point vector variable shuffles (YMM). +defm WriteFVarShuffleZ : X86SchedWritePair; // Floating point vector variable shuffles (ZMM). defm WriteFBlend : X86SchedWritePair; // Floating point vector blends. -defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM/ZMM). +defm WriteFBlendY : X86SchedWritePair; // Floating point vector blends (YMM). +defm WriteFBlendZ : X86SchedWritePair; // Floating point vector blends (ZMM). defm WriteFVarBlend : X86SchedWritePair; // Fp vector variable blends. -defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM/ZMM). +defm WriteFVarBlendY : X86SchedWritePair; // Fp vector variable blends (YMM). +defm WriteFVarBlendZ : X86SchedWritePair; // Fp vector variable blends (YMZMM). // FMA Scheduling helper class. class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; } // Horizontal Add/Sub (float and integer) defm WriteFHAdd : X86SchedWritePair; -defm WriteFHAddY : X86SchedWritePair; // YMM/ZMM. +defm WriteFHAddY : X86SchedWritePair; +defm WriteFHAddZ : X86SchedWritePair; defm WritePHAdd : X86SchedWritePair; -defm WritePHAddX : X86SchedWritePair; // XMM. -defm WritePHAddY : X86SchedWritePair; // YMM/ZMM. +defm WritePHAddX : X86SchedWritePair; +defm WritePHAddY : X86SchedWritePair; +defm WritePHAddZ : X86SchedWritePair; // Vector integer operations. def WriteVecLoad : SchedWrite; @@ -272,38 +290,51 @@ def WriteVecMoveFromGpr : SchedWrite; defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM). -defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM/ZMM). +defm WriteVecALUY : X86SchedWritePair; // Vector integer ALU op, no logicals (YMM). +defm WriteVecALUZ : X86SchedWritePair; // Vector integer ALU op, no logicals (ZMM). defm WriteVecLogic : X86SchedWritePair; // Vector integer and/or/xor logicals. defm WriteVecLogicX : X86SchedWritePair; // Vector integer and/or/xor logicals (XMM). -defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM/ZMM). +defm WriteVecLogicY : X86SchedWritePair; // Vector integer and/or/xor logicals (YMM). +defm WriteVecLogicZ : X86SchedWritePair; // Vector integer and/or/xor logicals (ZMM). defm WriteVecTest : X86SchedWritePair; // Vector integer TEST instructions. -defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM/ZMM). +defm WriteVecTestY : X86SchedWritePair; // Vector integer TEST instructions (YMM). +defm WriteVecTestZ : X86SchedWritePair; // Vector integer TEST instructions (ZMM). defm WriteVecShift : X86SchedWritePair; // Vector integer shifts (default). defm WriteVecShiftX : X86SchedWritePair; // Vector integer shifts (XMM). -defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM/ZMM). +defm WriteVecShiftY : X86SchedWritePair; // Vector integer shifts (YMM). +defm WriteVecShiftZ : X86SchedWritePair; // Vector integer shifts (ZMM). defm WriteVecShiftImm : X86SchedWritePair; // Vector integer immediate shifts (default). defm WriteVecShiftImmX: X86SchedWritePair; // Vector integer immediate shifts (XMM). -defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM/ZMM). +defm WriteVecShiftImmY: X86SchedWritePair; // Vector integer immediate shifts (YMM). +defm WriteVecShiftImmZ: X86SchedWritePair; // Vector integer immediate shifts (ZMM). defm WriteVecIMul : X86SchedWritePair; // Vector integer multiply (default). defm WriteVecIMulX : X86SchedWritePair; // Vector integer multiply (XMM). -defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM/ZMM). +defm WriteVecIMulY : X86SchedWritePair; // Vector integer multiply (YMM). +defm WriteVecIMulZ : X86SchedWritePair; // Vector integer multiply (ZMM). defm WritePMULLD : X86SchedWritePair; // Vector PMULLD. -defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM/ZMM). +defm WritePMULLDY : X86SchedWritePair; // Vector PMULLD (YMM). +defm WritePMULLDZ : X86SchedWritePair; // Vector PMULLD (ZMM). defm WriteShuffle : X86SchedWritePair; // Vector shuffles. defm WriteShuffleX : X86SchedWritePair; // Vector shuffles (XMM). -defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM/ZMM). +defm WriteShuffleY : X86SchedWritePair; // Vector shuffles (YMM). +defm WriteShuffleZ : X86SchedWritePair; // Vector shuffles (ZMM). defm WriteVarShuffle : X86SchedWritePair; // Vector variable shuffles. defm WriteVarShuffleX : X86SchedWritePair; // Vector variable shuffles (XMM). -defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM/ZMM). +defm WriteVarShuffleY : X86SchedWritePair; // Vector variable shuffles (YMM). +defm WriteVarShuffleZ : X86SchedWritePair; // Vector variable shuffles (ZMM). defm WriteBlend : X86SchedWritePair; // Vector blends. -defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM/ZMM). +defm WriteBlendY : X86SchedWritePair; // Vector blends (YMM). +defm WriteBlendZ : X86SchedWritePair; // Vector blends (ZMM). defm WriteVarBlend : X86SchedWritePair; // Vector variable blends. -defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM/ZMM). +defm WriteVarBlendY : X86SchedWritePair; // Vector variable blends (YMM). +defm WriteVarBlendZ : X86SchedWritePair; // Vector variable blends (ZMM). defm WritePSADBW : X86SchedWritePair; // Vector PSADBW. defm WritePSADBWX : X86SchedWritePair; // Vector PSADBW (XMM). -defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM/ZMM). +defm WritePSADBWY : X86SchedWritePair; // Vector PSADBW (YMM). +defm WritePSADBWZ : X86SchedWritePair; // Vector PSADBW (ZMM). defm WriteMPSAD : X86SchedWritePair; // Vector MPSAD. -defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM/ZMM). +defm WriteMPSADY : X86SchedWritePair; // Vector MPSAD (YMM). +defm WriteMPSADZ : X86SchedWritePair; // Vector MPSAD (ZMM). defm WritePHMINPOS : X86SchedWritePair; // Vector PHMINPOS. // Vector insert/extract operations. @@ -320,35 +351,44 @@ def WriteMMXMOVMSK : SchedWrite; // Conversion between integer and float. defm WriteCvtSD2I : X86SchedWritePair; // Double -> Integer. defm WriteCvtPD2I : X86SchedWritePair; // Double -> Integer (XMM). -defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM/ZMM). +defm WriteCvtPD2IY : X86SchedWritePair; // Double -> Integer (YMM). +defm WriteCvtPD2IZ : X86SchedWritePair; // Double -> Integer (ZMM). defm WriteCvtSS2I : X86SchedWritePair; // Float -> Integer. defm WriteCvtPS2I : X86SchedWritePair; // Float -> Integer (XMM). -defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM/ZMM). +defm WriteCvtPS2IY : X86SchedWritePair; // Float -> Integer (YMM). +defm WriteCvtPS2IZ : X86SchedWritePair; // Float -> Integer (ZMM). defm WriteCvtI2SD : X86SchedWritePair; // Integer -> Double. defm WriteCvtI2PD : X86SchedWritePair; // Integer -> Double (XMM). -defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM/ZMM). +defm WriteCvtI2PDY : X86SchedWritePair; // Integer -> Double (YMM). +defm WriteCvtI2PDZ : X86SchedWritePair; // Integer -> Double (ZMM). defm WriteCvtI2SS : X86SchedWritePair; // Integer -> Float. defm WriteCvtI2PS : X86SchedWritePair; // Integer -> Float (XMM). -defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM/ZMM). +defm WriteCvtI2PSY : X86SchedWritePair; // Integer -> Float (YMM). +defm WriteCvtI2PSZ : X86SchedWritePair; // Integer -> Float (ZMM). defm WriteCvtSS2SD : X86SchedWritePair; // Float -> Double size conversion. defm WriteCvtPS2PD : X86SchedWritePair; // Float -> Double size conversion (XMM). -defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM/ZMM). +defm WriteCvtPS2PDY : X86SchedWritePair; // Float -> Double size conversion (YMM). +defm WriteCvtPS2PDZ : X86SchedWritePair; // Float -> Double size conversion (ZMM). defm WriteCvtSD2SS : X86SchedWritePair; // Double -> Float size conversion. defm WriteCvtPD2PS : X86SchedWritePair; // Double -> Float size conversion (XMM). -defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM/ZMM). +defm WriteCvtPD2PSY : X86SchedWritePair; // Double -> Float size conversion (YMM). +defm WriteCvtPD2PSZ : X86SchedWritePair; // Double -> Float size conversion (ZMM). defm WriteCvtPH2PS : X86SchedWritePair; // Half -> Float size conversion. -defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM/ZMM). +defm WriteCvtPH2PSY : X86SchedWritePair; // Half -> Float size conversion (YMM). +defm WriteCvtPH2PSZ : X86SchedWritePair; // Half -> Float size conversion (ZMM). def WriteCvtPS2PH : SchedWrite; // // Float -> Half size conversion. -def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM/ZMM). +def WriteCvtPS2PHY : SchedWrite; // // Float -> Half size conversion (YMM). +def WriteCvtPS2PHZ : SchedWrite; // // Float -> Half size conversion (ZMM). def WriteCvtPS2PHSt : SchedWrite; // // Float -> Half + store size conversion. -def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM/ZMM). +def WriteCvtPS2PHYSt : SchedWrite; // // Float -> Half + store size conversion (YMM). +def WriteCvtPS2PHZSt : SchedWrite; // // Float -> Half + store size conversion (ZMM). // CRC32 instruction. defm WriteCRC32 : X86SchedWritePair; @@ -387,7 +427,8 @@ defm WriteFVarShuffle256 : X86SchedWritePair; // Fp 256-bit width variable shuff defm WriteShuffle256 : X86SchedWritePair; // 256-bit width vector shuffles. defm WriteVarShuffle256 : X86SchedWritePair; // 256-bit width vector variable shuffles. defm WriteVarVecShift : X86SchedWritePair; // Variable vector shifts. -defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM/ZMM). +defm WriteVarVecShiftY : X86SchedWritePair; // Variable vector shifts (YMM). +defm WriteVarVecShiftZ : X86SchedWritePair; // Variable vector shifts (ZMM). // Old microcoded instructions that nobody use. def WriteMicrocoded : SchedWrite; @@ -441,25 +482,25 @@ def SchedWriteVecMoveLSNT // Vector width wrappers. def SchedWriteFAdd - : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddY>; + : X86SchedWriteWidths<WriteFAdd, WriteFAddX, WriteFAddY, WriteFAddZ>; def SchedWriteFAdd64 - : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Y>; + : X86SchedWriteWidths<WriteFAdd64, WriteFAdd64X, WriteFAdd64Y, WriteFAdd64Z>; def SchedWriteFHAdd - : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddY>; + : X86SchedWriteWidths<WriteFHAdd, WriteFHAdd, WriteFHAddY, WriteFHAddZ>; def SchedWriteFCmp - : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpY>; + : X86SchedWriteWidths<WriteFCmp, WriteFCmpX, WriteFCmpY, WriteFCmpZ>; def SchedWriteFCmp64 - : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Y>; + : X86SchedWriteWidths<WriteFCmp64, WriteFCmp64X, WriteFCmp64Y, WriteFCmp64Z>; def SchedWriteFMul - : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulY>; + : X86SchedWriteWidths<WriteFMul, WriteFMulX, WriteFMulY, WriteFMulZ>; def SchedWriteFMul64 - : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Y>; + : X86SchedWriteWidths<WriteFMul64, WriteFMul64X, WriteFMul64Y, WriteFMul64Z>; def SchedWriteFMA - : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAY>; + : X86SchedWriteWidths<WriteFMA, WriteFMAX, WriteFMAY, WriteFMAZ>; def SchedWriteDPPD : X86SchedWriteWidths<WriteDPPD, WriteDPPD, WriteDPPD, WriteDPPD>; def SchedWriteDPPS - : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSY>; + : X86SchedWriteWidths<WriteDPPS, WriteDPPS, WriteDPPSY, WriteDPPSZ>; def SchedWriteFDiv : X86SchedWriteWidths<WriteFDiv, WriteFDivX, WriteFDivY, WriteFDivZ>; def SchedWriteFDiv64 @@ -471,90 +512,90 @@ def SchedWriteFSqrt64 : X86SchedWriteWidths<WriteFSqrt64, WriteFSqrt64X, WriteFSqrt64Y, WriteFSqrt64Z>; def SchedWriteFRcp - : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpY>; + : X86SchedWriteWidths<WriteFRcp, WriteFRcpX, WriteFRcpY, WriteFRcpZ>; def SchedWriteFRsqrt - : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtY>; + : X86SchedWriteWidths<WriteFRsqrt, WriteFRsqrtX, WriteFRsqrtY, WriteFRsqrtZ>; def SchedWriteFRnd - : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndY>; + : X86SchedWriteWidths<WriteFRnd, WriteFRnd, WriteFRndY, WriteFRndZ>; def SchedWriteFLogic - : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicY>; + : X86SchedWriteWidths<WriteFLogic, WriteFLogic, WriteFLogicY, WriteFLogicZ>; def SchedWriteFTest - : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestY>; + : X86SchedWriteWidths<WriteFTest, WriteFTest, WriteFTestY, WriteFTestZ>; def SchedWriteFShuffle : X86SchedWriteWidths<WriteFShuffle, WriteFShuffle, - WriteFShuffleY, WriteFShuffleY>; + WriteFShuffleY, WriteFShuffleZ>; def SchedWriteFVarShuffle : X86SchedWriteWidths<WriteFVarShuffle, WriteFVarShuffle, - WriteFVarShuffleY, WriteFVarShuffleY>; + WriteFVarShuffleY, WriteFVarShuffleZ>; def SchedWriteFBlend - : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendY>; + : X86SchedWriteWidths<WriteFBlend, WriteFBlend, WriteFBlendY, WriteFBlendZ>; def SchedWriteFVarBlend : X86SchedWriteWidths<WriteFVarBlend, WriteFVarBlend, - WriteFVarBlendY, WriteFVarBlendY>; + WriteFVarBlendY, WriteFVarBlendZ>; def SchedWriteCvtDQ2PD : X86SchedWriteWidths<WriteCvtI2SD, WriteCvtI2PD, - WriteCvtI2PDY, WriteCvtI2PDY>; + WriteCvtI2PDY, WriteCvtI2PDZ>; def SchedWriteCvtDQ2PS : X86SchedWriteWidths<WriteCvtI2SS, WriteCvtI2PS, - WriteCvtI2PSY, WriteCvtI2PSY>; + WriteCvtI2PSY, WriteCvtI2PSZ>; def SchedWriteCvtPD2DQ : X86SchedWriteWidths<WriteCvtSD2I, WriteCvtPD2I, - WriteCvtPD2IY, WriteCvtPD2IY>; + WriteCvtPD2IY, WriteCvtPD2IZ>; def SchedWriteCvtPS2DQ : X86SchedWriteWidths<WriteCvtSS2I, WriteCvtPS2I, - WriteCvtPS2IY, WriteCvtPS2IY>; + WriteCvtPS2IY, WriteCvtPS2IZ>; def SchedWriteCvtPS2PD : X86SchedWriteWidths<WriteCvtSS2SD, WriteCvtPS2PD, - WriteCvtPS2PDY, WriteCvtPS2PDY>; + WriteCvtPS2PDY, WriteCvtPS2PDZ>; def SchedWriteCvtPD2PS : X86SchedWriteWidths<WriteCvtSD2SS, WriteCvtPD2PS, - WriteCvtPD2PSY, WriteCvtPD2PSY>; + WriteCvtPD2PSY, WriteCvtPD2PSZ>; def SchedWriteVecALU - : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUY>; + : X86SchedWriteWidths<WriteVecALU, WriteVecALUX, WriteVecALUY, WriteVecALUZ>; def SchedWritePHAdd - : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddY>; + : X86SchedWriteWidths<WritePHAdd, WritePHAddX, WritePHAddY, WritePHAddZ>; def SchedWriteVecLogic : X86SchedWriteWidths<WriteVecLogic, WriteVecLogicX, - WriteVecLogicY, WriteVecLogicY>; + WriteVecLogicY, WriteVecLogicZ>; def SchedWriteVecTest : X86SchedWriteWidths<WriteVecTest, WriteVecTest, - WriteVecTestY, WriteVecTestY>; + WriteVecTestY, WriteVecTestZ>; def SchedWriteVecShift : X86SchedWriteWidths<WriteVecShift, WriteVecShiftX, - WriteVecShiftY, WriteVecShiftY>; + WriteVecShiftY, WriteVecShiftZ>; def SchedWriteVecShiftImm : X86SchedWriteWidths<WriteVecShiftImm, WriteVecShiftImmX, - WriteVecShiftImmY, WriteVecShiftImmY>; + WriteVecShiftImmY, WriteVecShiftImmZ>; def SchedWriteVarVecShift : X86SchedWriteWidths<WriteVarVecShift, WriteVarVecShift, - WriteVarVecShiftY, WriteVarVecShiftY>; + WriteVarVecShiftY, WriteVarVecShiftZ>; def SchedWriteVecIMul : X86SchedWriteWidths<WriteVecIMul, WriteVecIMulX, - WriteVecIMulY, WriteVecIMulY>; + WriteVecIMulY, WriteVecIMulZ>; def SchedWritePMULLD : X86SchedWriteWidths<WritePMULLD, WritePMULLD, - WritePMULLDY, WritePMULLDY>; + WritePMULLDY, WritePMULLDZ>; def SchedWriteMPSAD : X86SchedWriteWidths<WriteMPSAD, WriteMPSAD, - WriteMPSADY, WriteMPSADY>; + WriteMPSADY, WriteMPSADZ>; def SchedWritePSADBW : X86SchedWriteWidths<WritePSADBW, WritePSADBWX, - WritePSADBWY, WritePSADBWY>; + WritePSADBWY, WritePSADBWZ>; def SchedWriteShuffle : X86SchedWriteWidths<WriteShuffle, WriteShuffleX, - WriteShuffleY, WriteShuffleY>; + WriteShuffleY, WriteShuffleZ>; def SchedWriteVarShuffle : X86SchedWriteWidths<WriteVarShuffle, WriteVarShuffleX, - WriteVarShuffleY, WriteVarShuffleY>; + WriteVarShuffleY, WriteVarShuffleZ>; def SchedWriteBlend - : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendY>; + : X86SchedWriteWidths<WriteBlend, WriteBlend, WriteBlendY, WriteBlendZ>; def SchedWriteVarBlend : X86SchedWriteWidths<WriteVarBlend, WriteVarBlend, - WriteVarBlendY, WriteVarBlendY>; + WriteVarBlendY, WriteVarBlendZ>; // Vector size wrappers. def SchedWriteFAddSizes diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index f2a4156b0bc..83ee19b7363 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -210,28 +210,36 @@ defm : X86WriteRes<WriteEMMS, [AtomPort01], 5, [5], 1>; defm : AtomWriteResPair<WriteFAdd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFAddX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFAddY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : X86WriteResPairUnsupported<WriteFAddZ>; defm : AtomWriteResPair<WriteFAdd64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFAdd64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteFAdd64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; +defm : X86WriteResPairUnsupported<WriteFAdd64Z>; defm : AtomWriteResPair<WriteFCmp, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFCmpX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFCmpY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : X86WriteResPairUnsupported<WriteFCmpZ>; defm : AtomWriteResPair<WriteFCmp64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFCmp64X, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteFCmp64Y, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; +defm : X86WriteResPairUnsupported<WriteFCmp64Z>; defm : AtomWriteResPair<WriteFCom, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; defm : AtomWriteResPair<WriteFMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFMulY, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; +defm : X86WriteResPairUnsupported<WriteFMulZ>; defm : AtomWriteResPair<WriteFMul64, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFMul64X, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; defm : AtomWriteResPair<WriteFMul64Y, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; +defm : X86WriteResPairUnsupported<WriteFMul64Z>; defm : AtomWriteResPair<WriteFRcp, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; defm : AtomWriteResPair<WriteFRcpX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; defm : AtomWriteResPair<WriteFRcpY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; +defm : X86WriteResPairUnsupported<WriteFRcpZ>; defm : AtomWriteResPair<WriteFRsqrt, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; defm : AtomWriteResPair<WriteFRsqrtX, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; defm : AtomWriteResPair<WriteFRsqrtY, [AtomPort01], [AtomPort01], 9, 10, [9], [10]>; +defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; defm : AtomWriteResPair<WriteFDiv, [AtomPort01], [AtomPort01], 34, 34, [34], [34]>; defm : AtomWriteResPair<WriteFDivX, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>; defm : AtomWriteResPair<WriteFDivY, [AtomPort01], [AtomPort01], 70, 70, [70], [70]>; @@ -252,24 +260,33 @@ defm : AtomWriteResPair<WriteFSqrt80, [AtomPort01], [AtomPort01], 71, 71, defm : AtomWriteResPair<WriteFSign, [AtomPort1], [AtomPort1]>; defm : AtomWriteResPair<WriteFRnd, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteFRndY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : X86WriteResPairUnsupported<WriteFRndZ>; defm : AtomWriteResPair<WriteFLogic, [AtomPort01], [AtomPort0]>; defm : X86WriteResPairUnsupported<WriteFLogicY>; +defm : X86WriteResPairUnsupported<WriteFLogicZ>; defm : AtomWriteResPair<WriteFTest, [AtomPort01], [AtomPort0]>; defm : X86WriteResPairUnsupported<WriteFTestY>; +defm : X86WriteResPairUnsupported<WriteFTestZ>; defm : AtomWriteResPair<WriteFShuffle, [AtomPort0], [AtomPort0]>; defm : X86WriteResPairUnsupported<WriteFShuffleY>; +defm : X86WriteResPairUnsupported<WriteFShuffleZ>; defm : X86WriteResPairUnsupported<WriteFVarShuffle>; defm : X86WriteResPairUnsupported<WriteFVarShuffleY>; +defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; defm : X86WriteResPairUnsupported<WriteFMA>; defm : X86WriteResPairUnsupported<WriteFMAX>; defm : X86WriteResPairUnsupported<WriteFMAY>; +defm : X86WriteResPairUnsupported<WriteFMAZ>; defm : X86WriteResPairUnsupported<WriteDPPD>; defm : X86WriteResPairUnsupported<WriteDPPS>; defm : X86WriteResPairUnsupported<WriteDPPSY>; +defm : X86WriteResPairUnsupported<WriteDPPSZ>; defm : X86WriteResPairUnsupported<WriteFBlend>; defm : X86WriteResPairUnsupported<WriteFBlendY>; +defm : X86WriteResPairUnsupported<WriteFBlendZ>; defm : X86WriteResPairUnsupported<WriteFVarBlend>; defm : X86WriteResPairUnsupported<WriteFVarBlendY>; +defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; defm : X86WriteResPairUnsupported<WriteFShuffle256>; defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; @@ -280,30 +297,39 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; defm : AtomWriteResPair<WriteCvtSS2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; defm : AtomWriteResPair<WriteCvtPS2I, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteCvtPS2IY, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; +defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; defm : AtomWriteResPair<WriteCvtSD2I, [AtomPort01], [AtomPort01], 8, 9, [8], [9]>; defm : AtomWriteResPair<WriteCvtPD2I, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; defm : AtomWriteResPair<WriteCvtPD2IY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; +defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; defm : AtomWriteResPair<WriteCvtI2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteCvtI2PS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteCvtI2PSY, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; +defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; defm : AtomWriteResPair<WriteCvtI2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteCvtI2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; defm : AtomWriteResPair<WriteCvtI2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; +defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; defm : AtomWriteResPair<WriteCvtSS2SD, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteCvtPS2PD, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; defm : AtomWriteResPair<WriteCvtPS2PDY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; +defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; defm : AtomWriteResPair<WriteCvtSD2SS, [AtomPort01], [AtomPort01], 6, 7, [6], [7]>; defm : AtomWriteResPair<WriteCvtPD2PS, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; defm : AtomWriteResPair<WriteCvtPD2PSY, [AtomPort01], [AtomPort01], 7, 8, [7], [8]>; +defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; defm : X86WriteResPairUnsupported<WriteCvtPH2PS>; defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>; +defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; defm : X86WriteResUnsupported<WriteCvtPS2PH>; defm : X86WriteResUnsupported<WriteCvtPS2PHSt>; defm : X86WriteResUnsupported<WriteCvtPS2PHY>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; //////////////////////////////////////////////////////////////////////////////// // Vector integer operations. @@ -334,42 +360,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>; defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecALUY, [AtomPort01], [AtomPort0], 1, 1>; +defm : X86WriteResPairUnsupported<WriteVecALUZ>; defm : AtomWriteResPair<WriteVecLogic, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecLogicX, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecLogicY, [AtomPort01], [AtomPort0], 1, 1>; +defm : X86WriteResPairUnsupported<WriteVecLogicZ>; defm : AtomWriteResPair<WriteVecTest, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecTestY, [AtomPort01], [AtomPort0], 1, 1>; +defm : X86WriteResPairUnsupported<WriteVecTestZ>; defm : AtomWriteResPair<WriteVecShift, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; defm : AtomWriteResPair<WriteVecShiftX, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; defm : AtomWriteResPair<WriteVecShiftY, [AtomPort01], [AtomPort01], 2, 3, [2], [3]>; +defm : X86WriteResPairUnsupported<WriteVecShiftZ>; defm : AtomWriteResPair<WriteVecShiftImm, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>; defm : AtomWriteResPair<WriteVecShiftImmX, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>; defm : AtomWriteResPair<WriteVecShiftImmY, [AtomPort01], [AtomPort01], 1, 1, [1], [1]>; +defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; defm : AtomWriteResPair<WriteVecIMul, [AtomPort0], [AtomPort0], 4, 4, [4], [4]>; defm : AtomWriteResPair<WriteVecIMulX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteVecIMulY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : X86WriteResPairUnsupported<WriteVecIMulZ>; defm : AtomWriteResPair<WritePMULLD, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WritePMULLDY, [AtomPort01], [AtomPort0], 1, 1>; +defm : X86WriteResPairUnsupported<WritePMULLDZ>; defm : AtomWriteResPair<WritePHMINPOS, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WriteMPSAD, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteMPSADY, [AtomPort01], [AtomPort0], 1, 1>; +defm : X86WriteResPairUnsupported<WriteMPSADZ>; defm : AtomWriteResPair<WritePSADBW, [AtomPort01], [AtomPort01], 4, 4, [4], [4]>; defm : AtomWriteResPair<WritePSADBWX, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; defm : AtomWriteResPair<WritePSADBWY, [AtomPort0], [AtomPort0], 5, 5, [5], [5]>; +defm : X86WriteResPairUnsupported<WritePSADBWZ>; defm : AtomWriteResPair<WriteShuffle, [AtomPort0], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteShuffleX, [AtomPort0], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteShuffleY, [AtomPort0], [AtomPort0], 1, 1>; +defm : X86WriteResPairUnsupported<WriteShuffleZ>; defm : AtomWriteResPair<WriteVarShuffle, [AtomPort0], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVarShuffleX, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>; defm : AtomWriteResPair<WriteVarShuffleY, [AtomPort01], [AtomPort01], 4, 5, [4], [5]>; +defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; defm : X86WriteResPairUnsupported<WriteBlend>; defm : X86WriteResPairUnsupported<WriteBlendY>; +defm : X86WriteResPairUnsupported<WriteBlendZ>; defm : X86WriteResPairUnsupported<WriteVarBlend>; defm : X86WriteResPairUnsupported<WriteVarBlendY>; +defm : X86WriteResPairUnsupported<WriteVarBlendZ>; defm : X86WriteResPairUnsupported<WriteShuffle256>; defm : X86WriteResPairUnsupported<WriteVarShuffle256>; defm : X86WriteResPairUnsupported<WriteVarVecShift>; defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; +defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; //////////////////////////////////////////////////////////////////////////////// // Vector insert/extract operations. diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index e7597b4fcfb..fd41b2f7ed2 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -298,34 +298,44 @@ defm : X86WriteRes<WriteEMMS, [JFPU01, JFPX], 2, [1, 1], 1>; defm : JWriteResFpuPair<WriteFAdd, [JFPU0, JFPA], 3>; defm : JWriteResFpuPair<WriteFAddX, [JFPU0, JFPA], 3>; defm : JWriteResYMMPair<WriteFAddY, [JFPU0, JFPA], 3, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFAddZ>; defm : JWriteResFpuPair<WriteFAdd64, [JFPU0, JFPA], 3>; defm : JWriteResFpuPair<WriteFAdd64X, [JFPU0, JFPA], 3>; defm : JWriteResYMMPair<WriteFAdd64Y, [JFPU0, JFPA], 3, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFAdd64Z>; defm : JWriteResFpuPair<WriteFCmp, [JFPU0, JFPA], 2>; defm : JWriteResFpuPair<WriteFCmpX, [JFPU0, JFPA], 2>; defm : JWriteResYMMPair<WriteFCmpY, [JFPU0, JFPA], 2, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFCmpZ>; defm : JWriteResFpuPair<WriteFCmp64, [JFPU0, JFPA], 2>; defm : JWriteResFpuPair<WriteFCmp64X, [JFPU0, JFPA], 2>; defm : JWriteResYMMPair<WriteFCmp64Y, [JFPU0, JFPA], 2, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFCmp64Z>; defm : JWriteResFpuPair<WriteFCom, [JFPU0, JFPA, JALU0], 3>; defm : JWriteResFpuPair<WriteFMul, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFMulX, [JFPU1, JFPM], 2>; defm : JWriteResYMMPair<WriteFMulY, [JFPU1, JFPM], 2, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFMulZ>; defm : JWriteResFpuPair<WriteFMul64, [JFPU1, JFPM], 4, [1,2]>; defm : JWriteResFpuPair<WriteFMul64X, [JFPU1, JFPM], 4, [1,2]>; defm : JWriteResYMMPair<WriteFMul64Y, [JFPU1, JFPM], 4, [2,4], 2>; +defm : X86WriteResPairUnsupported<WriteFMul64Z>; defm : X86WriteResPairUnsupported<WriteFMA>; defm : X86WriteResPairUnsupported<WriteFMAX>; defm : X86WriteResPairUnsupported<WriteFMAY>; +defm : X86WriteResPairUnsupported<WriteFMAZ>; defm : JWriteResFpuPair<WriteDPPD, [JFPU1, JFPM, JFPA], 9, [1, 3, 3], 3>; defm : JWriteResFpuPair<WriteDPPS, [JFPU1, JFPM, JFPA], 11, [1, 3, 3], 5>; defm : JWriteResYMMPair<WriteDPPSY, [JFPU1, JFPM, JFPA], 12, [2, 6, 6], 10>; +defm : X86WriteResPairUnsupported<WriteDPPSZ>; defm : JWriteResFpuPair<WriteFRcp, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFRcpX, [JFPU1, JFPM], 2>; defm : JWriteResYMMPair<WriteFRcpY, [JFPU1, JFPM], 2, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFRcpZ>; defm : JWriteResFpuPair<WriteFRsqrt, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFRsqrtX, [JFPU1, JFPM], 2>; defm : JWriteResYMMPair<WriteFRsqrtY, [JFPU1, JFPM], 2, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; defm : JWriteResFpuPair<WriteFDiv, [JFPU1, JFPM], 19, [1, 19]>; defm : JWriteResFpuPair<WriteFDivX, [JFPU1, JFPM], 19, [1, 19]>; defm : JWriteResYMMPair<WriteFDivY, [JFPU1, JFPM], 38, [2, 38], 2>; @@ -346,18 +356,25 @@ defm : JWriteResFpuPair<WriteFSqrt80, [JFPU1, JFPM], 35, [1, 35]>; defm : JWriteResFpuPair<WriteFSign, [JFPU1, JFPM], 2>; defm : JWriteResFpuPair<WriteFRnd, [JFPU1, JSTC], 3>; defm : JWriteResYMMPair<WriteFRndY, [JFPU1, JSTC], 3, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteFRndZ>; defm : JWriteResFpuPair<WriteFLogic, [JFPU01, JFPX], 1>; defm : JWriteResYMMPair<WriteFLogicY, [JFPU01, JFPX], 1, [2, 2], 2>; +defm : X86WriteResPairUnsupported<WriteFLogicZ>; defm : JWriteResFpuPair<WriteFTest, [JFPU0, JFPA, JALU0], 3>; defm : JWriteResYMMPair<WriteFTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>; +defm : X86WriteResPairUnsupported<WriteFTestZ>; defm : JWriteResFpuPair<WriteFShuffle, [JFPU01, JFPX], 1>; defm : JWriteResYMMPair<WriteFShuffleY, [JFPU01, JFPX], 1, [2, 2], 2>; +defm : X86WriteResPairUnsupported<WriteFShuffleZ>; defm : JWriteResFpuPair<WriteFVarShuffle, [JFPU01, JFPX], 2, [1, 4], 3>; defm : JWriteResYMMPair<WriteFVarShuffleY,[JFPU01, JFPX], 3, [2, 6], 6>; +defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; defm : JWriteResFpuPair<WriteFBlend, [JFPU01, JFPX], 1>; defm : JWriteResYMMPair<WriteFBlendY, [JFPU01, JFPX], 1, [2, 2], 2>; +defm : X86WriteResPairUnsupported<WriteFBlendZ>; defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>; defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>; +defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>; defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; @@ -368,33 +385,42 @@ defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; defm : JWriteResFpuPair<WriteCvtSS2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>; defm : JWriteResFpuPair<WriteCvtPS2I, [JFPU1, JSTC], 3, [1,1], 1>; defm : JWriteResYMMPair<WriteCvtPS2IY, [JFPU1, JSTC], 3, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; defm : JWriteResFpuPair<WriteCvtSD2I, [JFPU1, JSTC, JFPA, JALU0], 7, [1,1,1,1], 2>; defm : JWriteResFpuPair<WriteCvtPD2I, [JFPU1, JSTC], 3, [1,1], 1>; defm : JWriteResYMMPair<WriteCvtPD2IY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; // FIXME: f+3 ST, LD+STC latency defm : JWriteResFpuPair<WriteCvtI2SS, [JFPU1, JSTC], 9, [1,1], 2>; defm : JWriteResFpuPair<WriteCvtI2PS, [JFPU1, JSTC], 3, [1,1], 1>; defm : JWriteResYMMPair<WriteCvtI2PSY, [JFPU1, JSTC], 3, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; defm : JWriteResFpuPair<WriteCvtI2SD, [JFPU1, JSTC], 9, [1,1], 2>; defm : JWriteResFpuPair<WriteCvtI2PD, [JFPU1, JSTC], 3, [1,1], 1>; defm : JWriteResYMMPair<WriteCvtI2PDY, [JFPU1, JSTC], 3, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; defm : JWriteResFpuPair<WriteCvtSS2SD, [JFPU1, JSTC], 7, [1,2], 2>; defm : JWriteResFpuPair<WriteCvtPS2PD, [JFPU1, JSTC], 2, [1,1], 1>; defm : JWriteResYMMPair<WriteCvtPS2PDY, [JFPU1, JSTC], 2, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; defm : JWriteResFpuPair<WriteCvtSD2SS, [JFPU1, JSTC], 7, [1,2], 2>; defm : JWriteResFpuPair<WriteCvtPD2PS, [JFPU1, JSTC], 3, [1,1], 1>; defm : JWriteResYMMPair<WriteCvtPD2PSY, [JFPU1, JSTC, JFPX], 6, [2,2,4], 3>; +defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; defm : JWriteResFpuPair<WriteCvtPH2PS, [JFPU1, JSTC], 3, [1,1], 1>; defm : JWriteResYMMPair<WriteCvtPH2PSY, [JFPU1, JSTC], 3, [2,2], 2>; +defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; defm : X86WriteRes<WriteCvtPS2PH, [JFPU1, JSTC], 3, [1,1], 1>; defm : X86WriteRes<WriteCvtPS2PHY, [JFPU1, JSTC, JFPX], 6, [2,2,2], 3>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; defm : X86WriteRes<WriteCvtPS2PHSt, [JFPU1, JSTC, JSAGU], 4, [1,1,1], 1>; defm : X86WriteRes<WriteCvtPS2PHYSt, [JFPU1, JSTC, JFPX, JSAGU], 7, [2,2,2,1], 3>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; //////////////////////////////////////////////////////////////////////////////// // Vector integer operations. @@ -425,42 +451,56 @@ defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>; defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecALUY, [JFPU01, JVALU], 1>; +defm : X86WriteResPairUnsupported<WriteVecALUZ>; defm : JWriteResFpuPair<WriteVecShift, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecShiftX, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecShiftY, [JFPU01, JVALU], 1>; +defm : X86WriteResPairUnsupported<WriteVecShiftZ>; defm : JWriteResFpuPair<WriteVecShiftImm, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecShiftImmX,[JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecShiftImmY,[JFPU01, JVALU], 1>; +defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; defm : JWriteResFpuPair<WriteVecIMul, [JFPU0, JVIMUL], 2>; defm : JWriteResFpuPair<WriteVecIMulX, [JFPU0, JVIMUL], 2>; defm : JWriteResFpuPair<WriteVecIMulY, [JFPU0, JVIMUL], 2>; +defm : X86WriteResPairUnsupported<WriteVecIMulZ>; defm : JWriteResFpuPair<WritePMULLD, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>; defm : JWriteResFpuPair<WritePMULLDY, [JFPU0, JFPU01, JVIMUL, JVALU], 4, [2, 1, 2, 1], 3>; +defm : X86WriteResPairUnsupported<WritePMULLDZ>; defm : JWriteResFpuPair<WriteMPSAD, [JFPU0, JVIMUL], 3, [1, 2]>; defm : JWriteResFpuPair<WriteMPSADY, [JFPU0, JVIMUL], 3, [1, 2]>; +defm : X86WriteResPairUnsupported<WriteMPSADZ>; defm : JWriteResFpuPair<WritePSADBW, [JFPU01, JVALU], 2>; defm : JWriteResFpuPair<WritePSADBWX, [JFPU01, JVALU], 2>; defm : JWriteResFpuPair<WritePSADBWY, [JFPU01, JVALU], 2>; +defm : X86WriteResPairUnsupported<WritePSADBWZ>; defm : JWriteResFpuPair<WritePHMINPOS, [JFPU0, JVALU], 2>; defm : JWriteResFpuPair<WriteShuffle, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteShuffleX, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteShuffleY, [JFPU01, JVALU], 1>; +defm : X86WriteResPairUnsupported<WriteShuffleZ>; defm : JWriteResFpuPair<WriteVarShuffle, [JFPU01, JVALU], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteVarShuffleX, [JFPU01, JVALU], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteVarShuffleY, [JFPU01, JVALU], 2, [1, 4], 3>; +defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; defm : JWriteResFpuPair<WriteBlend, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteBlendY, [JFPU01, JVALU], 1>; +defm : X86WriteResPairUnsupported<WriteBlendZ>; defm : JWriteResFpuPair<WriteVarBlend, [JFPU01, JVALU], 2, [1, 4], 3>; defm : JWriteResFpuPair<WriteVarBlendY, [JFPU01, JVALU], 2, [1, 4], 3>; +defm : X86WriteResPairUnsupported<WriteVarBlendZ>; defm : JWriteResFpuPair<WriteVecLogic, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecLogicX, [JFPU01, JVALU], 1>; defm : X86WriteResPairUnsupported<WriteVecLogicY>; +defm : X86WriteResPairUnsupported<WriteVecLogicZ>; defm : JWriteResFpuPair<WriteVecTest, [JFPU0, JFPA, JALU0], 3>; -defm : JWriteResYMMPair<WriteVecTestY , [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>; +defm : JWriteResYMMPair<WriteVecTestY, [JFPU01, JFPX, JFPA, JALU0], 4, [2, 2, 2, 1], 3>; +defm : X86WriteResPairUnsupported<WriteVecTestZ>; defm : JWriteResFpuPair<WriteShuffle256, [JFPU01, JVALU], 1>; defm : X86WriteResPairUnsupported<WriteVarShuffle256>; defm : X86WriteResPairUnsupported<WriteVarVecShift>; defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; +defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; //////////////////////////////////////////////////////////////////////////////// // Vector insert/extract operations. @@ -555,7 +595,7 @@ def JWriteZeroLatency : SchedWriteRes<[]> { let Latency = 0; } -// Certain instructions that use the same register for both source +// Certain instructions that use the same register for both source // operands do not have a real dependency on the previous contents of the // register, and thus, do not have to wait before completing. They can be // optimized out at register renaming stage. diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index b8386972de0..1ac3123cfcc 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -159,22 +159,28 @@ defm : X86WriteRes<WriteEMMS, [SLM_FPC_RSV01], 10, [10], 9>; defm : SLMWriteResPair<WriteFAdd, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>; +defm : X86WriteResPairUnsupported<WriteFAddZ>; defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>; +defm : X86WriteResPairUnsupported<WriteFAdd64Z>; defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFCmpY, [SLM_FPC_RSV1], 3>; +defm : X86WriteResPairUnsupported<WriteFCmpZ>; defm : SLMWriteResPair<WriteFCmp64, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFCmp64X, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFCmp64Y, [SLM_FPC_RSV1], 3>; +defm : X86WriteResPairUnsupported<WriteFCmp64Z>; defm : SLMWriteResPair<WriteFCom, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFMul, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; +defm : X86WriteResPairUnsupported<WriteFMulZ>; defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; +defm : X86WriteResPairUnsupported<WriteFMul64Z>; defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>; defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>; defm : SLMWriteResPair<WriteFDivY, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>; @@ -186,9 +192,11 @@ defm : X86WriteResPairUnsupported<WriteFDiv64Z>; defm : SLMWriteResPair<WriteFRcp, [SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFRcpX, [SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFRcpY, [SLM_FPC_RSV0], 5>; +defm : X86WriteResPairUnsupported<WriteFRcpZ>; defm : SLMWriteResPair<WriteFRsqrt, [SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFRsqrtX, [SLM_FPC_RSV0], 5>; defm : SLMWriteResPair<WriteFRsqrtY, [SLM_FPC_RSV0], 5>; +defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; defm : SLMWriteResPair<WriteFSqrt, [SLM_FPC_RSV0,SLMFPDivider], 20, [1,20], 1, 3>; defm : SLMWriteResPair<WriteFSqrtX, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>; defm : SLMWriteResPair<WriteFSqrtY, [SLM_FPC_RSV0,SLMFPDivider], 41, [1,40], 1, 3>; @@ -201,40 +209,52 @@ defm : SLMWriteResPair<WriteFSqrt80, [SLM_FPC_RSV0,SLMFPDivider], 40, [1,40]>; defm : SLMWriteResPair<WriteDPPD, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteDPPS, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteDPPSY, [SLM_FPC_RSV1], 3>; +defm : X86WriteResPairUnsupported<WriteDPPSZ>; defm : SLMWriteResPair<WriteFSign, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteFRnd, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFRndY, [SLM_FPC_RSV1], 3>; +defm : X86WriteResPairUnsupported<WriteFRndZ>; defm : SLMWriteResPair<WriteFLogic, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteFLogicY, [SLM_FPC_RSV01], 1>; +defm : X86WriteResPairUnsupported<WriteFLogicZ>; defm : SLMWriteResPair<WriteFTest, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteFTestY, [SLM_FPC_RSV01], 1>; +defm : X86WriteResPairUnsupported<WriteFTestZ>; defm : SLMWriteResPair<WriteFShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFShuffleY, [SLM_FPC_RSV0], 1>; +defm : X86WriteResPairUnsupported<WriteFShuffleZ>; defm : SLMWriteResPair<WriteFVarShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteFVarShuffleY,[SLM_FPC_RSV0], 1>; +defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; defm : SLMWriteResPair<WriteFBlend, [SLM_FPC_RSV0], 1>; // Conversion between integer and float. defm : SLMWriteResPair<WriteCvtSS2I, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPS2I, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPS2IY, [SLM_FPC_RSV01], 4>; +defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; defm : SLMWriteResPair<WriteCvtSD2I, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPD2I, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPD2IY, [SLM_FPC_RSV01], 4>; +defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; defm : SLMWriteResPair<WriteCvtI2SS, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtI2PS, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtI2PSY, [SLM_FPC_RSV01], 4>; +defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; defm : SLMWriteResPair<WriteCvtI2SD, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtI2PD, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtI2PDY, [SLM_FPC_RSV01], 4>; +defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; defm : SLMWriteResPair<WriteCvtSS2SD, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPS2PD, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPS2PDY, [SLM_FPC_RSV01], 4>; +defm : X86WriteResPairUnsupported<WriteCvtPS2PDZ>; defm : SLMWriteResPair<WriteCvtSD2SS, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPD2PS, [SLM_FPC_RSV01], 4>; defm : SLMWriteResPair<WriteCvtPD2PSY, [SLM_FPC_RSV01], 4>; +defm : X86WriteResPairUnsupported<WriteCvtPD2PSZ>; // Vector integer operations. def : WriteRes<WriteVecLoad, [SLM_MEC_RSV]> { let Latency = 3; } @@ -260,37 +280,49 @@ def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>; defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecShiftY, [SLM_FPC_RSV0], 1>; +defm : X86WriteResPairUnsupported<WriteVecShiftZ>; defm : SLMWriteResPair<WriteVecShiftImm, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecShiftImmX,[SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecShiftImmY,[SLM_FPC_RSV0], 1>; +defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; defm : SLMWriteResPair<WriteVecLogic, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecLogicX,[SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecLogicY,[SLM_FPC_RSV01], 1>; +defm : X86WriteResPairUnsupported<WriteVecLogicZ>; defm : SLMWriteResPair<WriteVecTest, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecTestY, [SLM_FPC_RSV01], 1>; +defm : X86WriteResPairUnsupported<WriteVecTestZ>; defm : SLMWriteResPair<WriteVecALU, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecALUX, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WriteVecALUY, [SLM_FPC_RSV01], 1>; +defm : X86WriteResPairUnsupported<WriteVecALUZ>; defm : SLMWriteResPair<WriteVecIMul, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WriteVecIMulX, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WriteVecIMulY, [SLM_FPC_RSV0], 4>; +defm : X86WriteResPairUnsupported<WriteVecIMulZ>; // FIXME: The below is closer to correct, but caused some perf regressions. //defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 11, [11], 7>; defm : SLMWriteResPair<WritePMULLD, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WritePMULLDY, [SLM_FPC_RSV0], 4>; +defm : X86WriteResPairUnsupported<WritePMULLDZ>; defm : SLMWriteResPair<WriteShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteShuffleY, [SLM_FPC_RSV0], 1>; +defm : X86WriteResPairUnsupported<WriteShuffleZ>; defm : SLMWriteResPair<WriteShuffleX, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVarShuffle, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVarShuffleX, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVarShuffleY, [SLM_FPC_RSV0], 1>; +defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; defm : SLMWriteResPair<WriteBlend, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteBlendY, [SLM_FPC_RSV0], 1>; +defm : X86WriteResPairUnsupported<WriteBlendZ>; defm : SLMWriteResPair<WriteMPSAD, [SLM_FPC_RSV0], 7>; defm : SLMWriteResPair<WriteMPSADY, [SLM_FPC_RSV0], 7>; +defm : X86WriteResPairUnsupported<WriteMPSADZ>; defm : SLMWriteResPair<WritePSADBW, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WritePSADBWX, [SLM_FPC_RSV0], 4>; defm : SLMWriteResPair<WritePSADBWY, [SLM_FPC_RSV0], 4>; +defm : X86WriteResPairUnsupported<WritePSADBWZ>; defm : SLMWriteResPair<WritePHMINPOS, [SLM_FPC_RSV0], 4>; // Vector insert/extract operations. @@ -309,9 +341,11 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> { defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>; defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>; +defm : X86WriteResPairUnsupported<WriteFHAddZ>; defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WritePHAddY, [SLM_FPC_RSV01], 1>; +defm : X86WriteResPairUnsupported<WritePHAddZ>; // String instructions. // Packed Compare Implicit Length Strings, Return Mask @@ -407,25 +441,33 @@ def : WriteRes<WriteNop, []>; // scheduling resources anyway. def : WriteRes<WriteIMulH, [SLM_FPC_RSV0]>; defm : X86WriteResPairUnsupported<WriteFBlendY>; +defm : X86WriteResPairUnsupported<WriteFBlendZ>; defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>; defm : X86WriteResPairUnsupported<WriteVarBlendY>; +defm : X86WriteResPairUnsupported<WriteVarBlendZ>; defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>; defm : X86WriteResPairUnsupported<WriteFVarBlendY>; +defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; defm : X86WriteResPairUnsupported<WriteFShuffle256>; defm : X86WriteResPairUnsupported<WriteFVarShuffle256>; defm : X86WriteResPairUnsupported<WriteShuffle256>; defm : X86WriteResPairUnsupported<WriteVarShuffle256>; defm : SLMWriteResPair<WriteVarVecShift, [SLM_FPC_RSV0], 1>; defm : X86WriteResPairUnsupported<WriteVarVecShiftY>; +defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; defm : X86WriteResPairUnsupported<WriteFMA>; defm : X86WriteResPairUnsupported<WriteFMAX>; defm : X86WriteResPairUnsupported<WriteFMAY>; +defm : X86WriteResPairUnsupported<WriteFMAZ>; defm : X86WriteResPairUnsupported<WriteCvtPH2PS>; defm : X86WriteResPairUnsupported<WriteCvtPH2PSY>; +defm : X86WriteResPairUnsupported<WriteCvtPH2PSZ>; defm : X86WriteResUnsupported<WriteCvtPS2PH>; defm : X86WriteResUnsupported<WriteCvtPS2PHY>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; defm : X86WriteResUnsupported<WriteCvtPS2PHSt>; defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; } // SchedModel diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index df496e230ec..6e6fe146e78 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -212,34 +212,45 @@ defm : X86WriteRes<WriteFMoveY, [ZnFPU], 1, [1], 1>; defm : ZnWriteResFpuPair<WriteFAdd, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFAddX, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFAddY, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFAddZ>; defm : ZnWriteResFpuPair<WriteFAdd64, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFAdd64X, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFAdd64Y, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFAdd64Z>; defm : ZnWriteResFpuPair<WriteFCmp, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFCmpX, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFCmpY, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFCmpZ>; defm : ZnWriteResFpuPair<WriteFCmp64, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFCmp64X, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFCmp64Y, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WriteFCmp64Z>; defm : ZnWriteResFpuPair<WriteFCom, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WriteFBlend, [ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteFBlendY, [ZnFPU01], 1>; +defm : X86WriteResPairUnsupported<WriteFBlendZ>; defm : ZnWriteResFpuPair<WriteFVarBlend, [ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteFVarBlendY,[ZnFPU01], 1>; +defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; defm : ZnWriteResFpuPair<WriteVarBlend, [ZnFPU0], 1>; defm : ZnWriteResFpuPair<WriteVarBlendY, [ZnFPU0], 1>; +defm : X86WriteResPairUnsupported<WriteVarBlendZ>; defm : ZnWriteResFpuPair<WriteCvtSS2I, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtPS2I, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtPS2IY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtPS2IZ>; defm : ZnWriteResFpuPair<WriteCvtSD2I, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtPD2I, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtPD2IY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtPD2IZ>; defm : ZnWriteResFpuPair<WriteCvtI2SS, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtI2PS, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtI2PSY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtI2PSZ>; defm : ZnWriteResFpuPair<WriteCvtI2SD, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtI2PD, [ZnFPU3], 5>; defm : ZnWriteResFpuPair<WriteCvtI2PDY, [ZnFPU3], 5>; +defm : X86WriteResPairUnsupported<WriteCvtI2PDZ>; defm : ZnWriteResFpuPair<WriteFDiv, [ZnFPU3], 15>; defm : ZnWriteResFpuPair<WriteFDivX, [ZnFPU3], 15>; //defm : ZnWriteResFpuPair<WriteFDivY, [ZnFPU3], 15>; @@ -251,29 +262,39 @@ defm : X86WriteResPairUnsupported<WriteFDiv64Z>; defm : ZnWriteResFpuPair<WriteFSign, [ZnFPU3], 2>; defm : ZnWriteResFpuPair<WriteFRnd, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? defm : ZnWriteResFpuPair<WriteFRndY, [ZnFPU3], 4, [1], 1, 7, 1>; // FIXME: Should folds require 1 extra uops? +defm : X86WriteResPairUnsupported<WriteFRndZ>; defm : ZnWriteResFpuPair<WriteFLogic, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteFLogicY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteFLogicZ>; defm : ZnWriteResFpuPair<WriteFTest, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteFTestY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteFTestZ>; defm : ZnWriteResFpuPair<WriteFShuffle, [ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteFShuffleY, [ZnFPU12], 1>; +defm : X86WriteResPairUnsupported<WriteFShuffleZ>; defm : ZnWriteResFpuPair<WriteFVarShuffle, [ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteFVarShuffleY,[ZnFPU12], 1>; +defm : X86WriteResPairUnsupported<WriteFVarShuffleZ>; defm : ZnWriteResFpuPair<WriteFMul, [ZnFPU01], 3, [1], 1, 7, 1>; defm : ZnWriteResFpuPair<WriteFMulX, [ZnFPU01], 3, [1], 1, 7, 1>; defm : ZnWriteResFpuPair<WriteFMulY, [ZnFPU01], 4, [1], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteFMulZ>; defm : ZnWriteResFpuPair<WriteFMul64, [ZnFPU01], 3, [1], 1, 7, 1>; defm : ZnWriteResFpuPair<WriteFMul64X, [ZnFPU01], 3, [1], 1, 7, 1>; defm : ZnWriteResFpuPair<WriteFMul64Y, [ZnFPU01], 4, [1], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteFMul64Z>; defm : ZnWriteResFpuPair<WriteFMA, [ZnFPU03], 5>; defm : ZnWriteResFpuPair<WriteFMAX, [ZnFPU03], 5>; defm : ZnWriteResFpuPair<WriteFMAY, [ZnFPU03], 5>; +defm : X86WriteResPairUnsupported<WriteFMAZ>; defm : ZnWriteResFpuPair<WriteFRcp, [ZnFPU01], 5>; defm : ZnWriteResFpuPair<WriteFRcpX, [ZnFPU01], 5>; defm : ZnWriteResFpuPair<WriteFRcpY, [ZnFPU01], 5, [1], 1, 7, 2>; +defm : X86WriteResPairUnsupported<WriteFRcpZ>; //defm : ZnWriteResFpuPair<WriteFRsqrt, [ZnFPU02], 5>; defm : ZnWriteResFpuPair<WriteFRsqrtX, [ZnFPU01], 5, [1], 1, 7, 1>; //defm : ZnWriteResFpuPair<WriteFRsqrtY, [ZnFPU01], 5, [2], 2>; +defm : X86WriteResPairUnsupported<WriteFRsqrtZ>; defm : ZnWriteResFpuPair<WriteFSqrt, [ZnFPU3], 20, [20]>; defm : ZnWriteResFpuPair<WriteFSqrtX, [ZnFPU3], 20, [20]>; defm : ZnWriteResFpuPair<WriteFSqrtY, [ZnFPU3], 28, [28], 1, 7, 1>; @@ -309,40 +330,52 @@ defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>; defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecShiftX, [ZnFPU2], 1>; defm : ZnWriteResFpuPair<WriteVecShiftY, [ZnFPU2], 2>; +defm : X86WriteResPairUnsupported<WriteVecShiftZ>; defm : ZnWriteResFpuPair<WriteVecShiftImm, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecShiftImmX, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecShiftImmY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVecShiftImmZ>; defm : ZnWriteResFpuPair<WriteVecLogic, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecLogicX, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecLogicY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVecLogicZ>; defm : ZnWriteResFpuPair<WriteVecTest, [ZnFPU12], 1, [2], 1, 7, 1>; defm : ZnWriteResFpuPair<WriteVecTestY, [ZnFPU12], 1, [2], 1, 7, 1>; +defm : X86WriteResPairUnsupported<WriteVecTestZ>; defm : ZnWriteResFpuPair<WriteVecALU, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecALUX, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVecALUY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVecALUZ>; defm : ZnWriteResFpuPair<WriteVecIMul, [ZnFPU0], 4>; defm : ZnWriteResFpuPair<WriteVecIMulX, [ZnFPU0], 4>; defm : ZnWriteResFpuPair<WriteVecIMulY, [ZnFPU0], 4>; +defm : X86WriteResPairUnsupported<WriteVecIMulZ>; defm : ZnWriteResFpuPair<WritePMULLD, [ZnFPU0], 4, [1], 1, 7, 1>; // FIXME defm : ZnWriteResFpuPair<WritePMULLDY, [ZnFPU0], 5, [2], 1, 7, 1>; // FIXME +defm : X86WriteResPairUnsupported<WritePMULLDZ>; defm : ZnWriteResFpuPair<WriteShuffle, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteShuffleX, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteShuffleY, [ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteShuffleZ>; defm : ZnWriteResFpuPair<WriteVarShuffle, [ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVarShuffleX,[ZnFPU], 1>; defm : ZnWriteResFpuPair<WriteVarShuffleY,[ZnFPU], 1>; +defm : X86WriteResPairUnsupported<WriteVarShuffleZ>; defm : ZnWriteResFpuPair<WriteBlend, [ZnFPU01], 1>; defm : ZnWriteResFpuPair<WriteBlendY, [ZnFPU01], 1>; +defm : X86WriteResPairUnsupported<WriteBlendZ>; defm : ZnWriteResFpuPair<WriteShuffle256, [ZnFPU], 2>; defm : ZnWriteResFpuPair<WriteVarShuffle256, [ZnFPU], 2>; defm : ZnWriteResFpuPair<WritePSADBW, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WritePSADBWX, [ZnFPU0], 3>; defm : ZnWriteResFpuPair<WritePSADBWY, [ZnFPU0], 3>; +defm : X86WriteResPairUnsupported<WritePSADBWZ>; defm : ZnWriteResFpuPair<WritePHMINPOS, [ZnFPU0], 4>; // Vector Shift Operations defm : ZnWriteResFpuPair<WriteVarVecShift, [ZnFPU12], 1>; defm : ZnWriteResFpuPair<WriteVarVecShiftY, [ZnFPU12], 1>; +defm : X86WriteResPairUnsupported<WriteVarVecShiftZ>; // Vector insert/extract operations. defm : ZnWriteResFpuPair<WriteVecInsert, [ZnFPU], 1>; @@ -1144,9 +1177,10 @@ def ZnWriteCVTPD2PSYr: SchedWriteRes<[ZnFPU3]> { // CVTPD2PS. // x,x. def : SchedAlias<WriteCvtPD2PS, ZnWriteCVTPD2PSr>; - // y,y. def : SchedAlias<WriteCvtPD2PSY, ZnWriteCVTPD2PSYr>; +// z,z. +defm : X86WriteResUnsupported<WriteCvtPD2PSZ>; def ZnWriteCVTPD2PSLd: SchedWriteRes<[ZnAGU,ZnFPU03]> { let Latency = 11; @@ -1161,6 +1195,8 @@ def ZnWriteCVTPD2PSYLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { let Latency = 11; } def : SchedAlias<WriteCvtPD2PSYLd, ZnWriteCVTPD2PSYLd>; +// z,m512 +defm : X86WriteResUnsupported<WriteCvtPD2PSZLd>; // CVTSD2SS. // x,x. @@ -1185,12 +1221,14 @@ def ZnWriteCVTPS2PDLd : SchedWriteRes<[ZnAGU, ZnFPU3]> { } def : SchedAlias<WriteCvtPS2PDLd, ZnWriteCVTPS2PDLd>; def : SchedAlias<WriteCvtPS2PDYLd, ZnWriteCVTPS2PDLd>; +defm : X86WriteResUnsupported<WriteCvtPS2PDZLd>; // y,x. def ZnWriteVCVTPS2PDY : SchedWriteRes<[ZnFPU3]> { let Latency = 3; } def : SchedAlias<WriteCvtPS2PDY, ZnWriteVCVTPS2PDY>; +defm : X86WriteResUnsupported<WriteCvtPS2PDZ>; // CVTSS2SD. // x,x. @@ -1288,17 +1326,21 @@ def : InstRW<[ZnWriteCVSTSI2SILd], (instregex "(V?)CVT(T?)SD2SI(64)?rm")>; // x,v,i. def : SchedAlias<WriteCvtPS2PH, ZnWriteMicrocoded>; def : SchedAlias<WriteCvtPS2PHY, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZ>; // m,v,i. def : SchedAlias<WriteCvtPS2PHSt, ZnWriteMicrocoded>; def : SchedAlias<WriteCvtPS2PHYSt, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>; // VCVTPH2PS. // v,x. def : SchedAlias<WriteCvtPH2PS, ZnWriteMicrocoded>; def : SchedAlias<WriteCvtPH2PSY, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZ>; // v,m. def : SchedAlias<WriteCvtPH2PSLd, ZnWriteMicrocoded>; def : SchedAlias<WriteCvtPH2PSYLd, ZnWriteMicrocoded>; +defm : X86WriteResUnsupported<WriteCvtPH2PSZLd>; //-- SSE4A instructions --// // EXTRQ diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 6c137846dd9..7ec84256a9f 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -42,7 +42,7 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_addpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -96,7 +96,7 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_addps: ; SKX: # %bb.0: -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -150,7 +150,7 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; ; SKX-LABEL: test_addsubpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -205,7 +205,7 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; ; SKX-LABEL: test_addsubps: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -267,7 +267,7 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKX: # %bb.0: ; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotpd: @@ -336,7 +336,7 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKX: # %bb.0: ; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andnotps: @@ -405,7 +405,7 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX: # %bb.0: ; SKX-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andpd: @@ -472,7 +472,7 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKX: # %bb.0: ; SKX-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_andps: @@ -538,7 +538,7 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -602,7 +602,7 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * ; SKX: # %bb.0: ; SKX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] ; SKX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],mem[2],ymm1[3],mem[4,5,6],ymm1[7] sched: [8:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_blendps: @@ -956,7 +956,7 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_cmppd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1022,7 +1022,7 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_cmpps: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1090,7 +1090,7 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [7:1.00] ; SKX-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [13:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: @@ -1153,9 +1153,9 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [11:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: @@ -1217,7 +1217,7 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SKX-LABEL: test_cvtpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1281,7 +1281,7 @@ define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SKX-LABEL: test_cvttpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1406,7 +1406,7 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_cvtps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtps2dq %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vcvtps2dq (%rdi), %ymm1 # sched: [11:0.50] ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1470,7 +1470,7 @@ define <8 x i32> @test_cvttps2dq(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_cvttps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:0.50] ; SKX-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [11:0.50] ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1979,7 +1979,7 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float ; SKX: # %bb.0: ; SKX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] ; SKX-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_insertf128: @@ -2334,7 +2334,7 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_maxpd: ; SKX: # %bb.0: -; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2389,7 +2389,7 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_maxps: ; SKX: # %bb.0: -; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2444,7 +2444,7 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_minpd: ; SKX: # %bb.0: -; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2499,7 +2499,7 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_minps: ; SKX: # %bb.0: -; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2560,7 +2560,7 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; SKX-LABEL: test_movapd: ; SKX: # %bb.0: ; SKX-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2622,7 +2622,7 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; SKX-LABEL: test_movaps: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2685,7 +2685,7 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] ; SKX-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movddup: @@ -2912,7 +2912,7 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; ; SKX-LABEL: test_movntpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2965,7 +2965,7 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_movntps: ; SKX: # %bb.0: -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3025,7 +3025,7 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] ; SKX-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movshdup: @@ -3088,7 +3088,7 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] ; SKX-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_movsldup: @@ -3152,7 +3152,7 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SKX-LABEL: test_movupd: ; SKX: # %bb.0: ; SKX-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3216,7 +3216,7 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SKX-LABEL: test_movups: ; SKX: # %bb.0: ; SKX-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3272,7 +3272,7 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_mulpd: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3326,7 +3326,7 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_mulps: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3387,7 +3387,7 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) ; SKX: # %bb.0: ; SKX-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: orpd: @@ -3454,7 +3454,7 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 ; SKX: # %bb.0: ; SKX-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_orps: @@ -3521,7 +3521,7 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou ; SKX: # %bb.0: ; SKX-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [3:1.00] ; SKX-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [10:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_perm2f128: @@ -3584,7 +3584,7 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] ; SKX-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilpd: @@ -3647,7 +3647,7 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; SKX-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilpd_ymm: @@ -3710,7 +3710,7 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] ; SKX-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilps: @@ -3773,7 +3773,7 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; SKX-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [8:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_permilps_ymm: @@ -4056,7 +4056,7 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm0 # sched: [4:1.00] ; SKX-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rcpps: @@ -4118,9 +4118,9 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: -; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:0.67] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [15:1.00] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundpd: @@ -4182,9 +4182,9 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: -; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:0.67] -; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:0.67] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [8:1.00] +; SKX-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [15:1.00] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_roundps: @@ -4248,7 +4248,7 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [4:1.00] ; SKX-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_rsqrtps: @@ -4312,7 +4312,7 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SKX: # %bb.0: ; SKX-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] ; SKX-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufpd: @@ -4375,7 +4375,7 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SKX: # %bb.0: ; SKX-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] ; SKX-NEXT: vshufps {{.*#+}} ymm1 = ymm1[0,3],mem[0,0],ymm1[4,7],mem[4,4] sched: [8:1.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_shufps: @@ -4438,7 +4438,7 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [18:12.00] ; SKX-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [25:12.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtpd: @@ -4502,7 +4502,7 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vsqrtps %ymm0, %ymm0 # sched: [12:6.00] ; SKX-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:6.00] -; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_sqrtps: @@ -4559,7 +4559,7 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; ; SKX-LABEL: test_subpd: ; SKX: # %bb.0: -; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4613,7 +4613,7 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; ; SKX-LABEL: test_subps: ; SKX: # %bb.0: -; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5008,7 +5008,7 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKX: # %bb.0: ; SKX-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; SKX-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpckhpd: @@ -5125,7 +5125,7 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SKX: # %bb.0: ; SKX-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] ; SKX-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_unpcklpd: @@ -5242,7 +5242,7 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorpd: @@ -5309,7 +5309,7 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SKX: # %bb.0: ; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SKX-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [8:0.50] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_xorps: diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index 4bfbf1ec208..1bfe60e3104 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -76,7 +76,7 @@ define <4 x double> @test_broadcastsd_ymm(<2 x double> %a0) { ; SKX-LABEL: test_broadcastsd_ymm: ; SKX: # %bb.0: ; SKX-NEXT: vbroadcastsd %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastsd_ymm: @@ -117,7 +117,7 @@ define <4 x float> @test_broadcastss(<4 x float> %a0) { ; SKX-LABEL: test_broadcastss: ; SKX: # %bb.0: ; SKX-NEXT: vbroadcastss %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastss: @@ -158,7 +158,7 @@ define <8 x float> @test_broadcastss_ymm(<4 x float> %a0) { ; SKX-LABEL: test_broadcastss_ymm: ; SKX: # %bb.0: ; SKX-NEXT: vbroadcastss %xmm0, %ymm0 # sched: [3:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_broadcastss_ymm: @@ -2634,7 +2634,7 @@ define <4 x double> @test_permpd(<4 x double> %a0, <4 x double> *%a1) { ; SKX: # %bb.0: ; SKX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,2,2,3] sched: [3:1.00] ; SKX-NEXT: vpermpd {{.*#+}} ymm1 = mem[0,2,2,3] sched: [10:1.00] -; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permpd: @@ -2683,7 +2683,7 @@ define <8 x float> @test_permps(<8 x i32> %a0, <8 x float> %a1, <8 x float> *%a2 ; SKX: # %bb.0: ; SKX-NEXT: vpermps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] ; SKX-NEXT: vpermps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] -; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_permps: @@ -3320,7 +3320,7 @@ define <16 x i16> @test_pmaddubsw(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) ; ; SKX-LABEL: test_pmaddubsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddubsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddubsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3364,7 +3364,7 @@ define <8 x i32> @test_pmaddwd(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmaddwd: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddwd %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddwd (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4761,7 +4761,7 @@ define <4 x i64> @test_pmuldq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; ; SKX-LABEL: test_pmuldq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmuldq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmuldq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4805,7 +4805,7 @@ define <16 x i16> @test_pmulhrsw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2 ; ; SKX-LABEL: test_pmulhrsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhrsw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhrsw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4848,7 +4848,7 @@ define <16 x i16> @test_pmulhuw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmulhuw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhuw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhuw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4891,7 +4891,7 @@ define <16 x i16> @test_pmulhw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmulhw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4934,8 +4934,8 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; ; SKX-LABEL: test_pmulld: ; SKX: # %bb.0: -; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:0.67] -; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:0.67] +; SKX-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:1.00] +; SKX-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [17:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; ZNVER1-LABEL: test_pmulld: @@ -4976,7 +4976,7 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; ; SKX-LABEL: test_pmullw: ; SKX: # %bb.0: -; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5018,7 +5018,7 @@ define <4 x i64> @test_pmuludq(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; ; SKX-LABEL: test_pmuludq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vpmuludq %ymm1, %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: vpmuludq (%rdi), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index aeabf2f7cea..28af00ae3b6 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -12,7 +12,7 @@ define <8 x double> @addpd512(<8 x double> %y, <8 x double> %x) { ; ; SKX-LABEL: addpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <8 x double> %x, %y @@ -42,7 +42,7 @@ define <16 x float> @addps512(<16 x float> %y, <16 x float> %x) { ; ; SKX-LABEL: addps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %add.i = fadd <16 x float> %x, %y @@ -72,7 +72,7 @@ define <8 x double> @subpd512(<8 x double> %y, <8 x double> %x) { ; ; SKX-LABEL: subpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %sub.i = fsub <8 x double> %x, %y @@ -103,7 +103,7 @@ define <16 x float> @subps512(<16 x float> %y, <16 x float> %x) { ; ; SKX-LABEL: subps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %sub.i = fsub <16 x float> %x, %y @@ -134,7 +134,7 @@ define <8 x i64> @imulq512(<8 x i64> %y, <8 x i64> %x) { ; ; SKX-LABEL: imulq512: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.00] +; SKX-NEXT: vpmullq %zmm0, %zmm1, %zmm0 # sched: [12:1.50] ; SKX-NEXT: retq # sched: [7:1.00] %z = mul <8 x i64>%x, %y ret <8 x i64>%z @@ -148,7 +148,7 @@ define <4 x i64> @imulq256(<4 x i64> %y, <4 x i64> %x) { ; ; SKX-LABEL: imulq256: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.00] +; SKX-NEXT: vpmullq %ymm0, %ymm1, %ymm0 # sched: [12:1.50] ; SKX-NEXT: retq # sched: [7:1.00] %z = mul <4 x i64>%x, %y ret <4 x i64>%z @@ -162,7 +162,7 @@ define <2 x i64> @imulq128(<2 x i64> %y, <2 x i64> %x) { ; ; SKX-LABEL: imulq128: ; SKX: # %bb.0: -; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SKX-NEXT: vpmullq %xmm0, %xmm1, %xmm0 # sched: [12:1.50] ; SKX-NEXT: retq # sched: [7:1.00] %z = mul <2 x i64>%x, %y ret <2 x i64>%z @@ -176,7 +176,7 @@ define <8 x double> @mulpd512(<8 x double> %y, <8 x double> %x) { ; ; SKX-LABEL: mulpd512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <8 x double> %x, %y @@ -206,7 +206,7 @@ define <16 x float> @mulps512(<16 x float> %y, <16 x float> %x) { ; ; SKX-LABEL: mulps512: ; SKX: # %bb.0: # %entry -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %mul.i = fmul <16 x float> %x, %y @@ -543,7 +543,7 @@ define <16 x i32> @vpmulld_test(<16 x i32> %i, <16 x i32> %j) { ; ; SKX-LABEL: vpmulld_test: ; SKX: # %bb.0: -; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:0.67] +; SKX-NEXT: vpmulld %zmm1, %zmm0, %zmm0 # sched: [10:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %x = mul <16 x i32> %i, %j ret <16 x i32> %x @@ -712,7 +712,7 @@ define <16 x float> @test_mask_vaddps(<16 x float> %dst, <16 x float> %i, ; SKX-LABEL: test_mask_vaddps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] <16 x float> %j, <16 x i32> %mask1) nounwind readnone { @@ -732,7 +732,7 @@ define <16 x float> @test_mask_vmulps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vmulps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = fmul <16 x float> %i, %j @@ -750,7 +750,7 @@ define <16 x float> @test_mask_vminps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vminps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vminps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %cmp_res = fcmp olt <16 x float> %i, %j @@ -769,7 +769,7 @@ define <8 x double> @test_mask_vminpd(<8 x double> %dst, <8 x double> %i, <8 x d ; SKX-LABEL: test_mask_vminpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vminpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %cmp_res = fcmp olt <8 x double> %i, %j @@ -788,7 +788,7 @@ define <16 x float> @test_mask_vmaxps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vmaxps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vmaxps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %cmp_res = fcmp ogt <16 x float> %i, %j @@ -807,7 +807,7 @@ define <8 x double> @test_mask_vmaxpd(<8 x double> %dst, <8 x double> %i, <8 x d ; SKX-LABEL: test_mask_vmaxpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %ymm3, %ymm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vmaxpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i32> %mask1, zeroinitializer %cmp_res = fcmp ogt <8 x double> %i, %j @@ -826,7 +826,7 @@ define <16 x float> @test_mask_vsubps(<16 x float> %dst, <16 x float> %i, <16 x ; SKX-LABEL: test_mask_vsubps: ; SKX: # %bb.0: ; SKX-NEXT: vptestmd %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vsubps %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <16 x i32> %mask1, zeroinitializer %x = fsub <16 x float> %i, %j @@ -862,7 +862,7 @@ define <8 x double> @test_mask_vaddpd(<8 x double> %dst, <8 x double> %i, <8 x d ; SKX-LABEL: test_mask_vaddpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmq %zmm3, %zmm3, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %x = fadd <8 x double> %i, %j @@ -880,7 +880,7 @@ define <8 x double> @test_maskz_vaddpd(<8 x double> %i, <8 x double> %j, <8 x i6 ; SKX-LABEL: test_maskz_vaddpd: ; SKX: # %bb.0: ; SKX-NEXT: vptestmq %zmm2, %zmm2, %k1 # sched: [3:1.00] -; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.33] +; SKX-NEXT: vaddpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <8 x i64> %mask1, zeroinitializer %x = fadd <8 x double> %i, %j @@ -1071,10 +1071,10 @@ define double @test1(double %a, double %b) nounwind { ; SKX-NEXT: jne .LBB64_1 # sched: [1:0.50] ; SKX-NEXT: jnp .LBB64_2 # sched: [1:0.50] ; SKX-NEXT: .LBB64_1: # %l1 -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB64_2: # %l2 -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -1104,10 +1104,10 @@ define float @test2(float %a, float %b) nounwind { ; SKX-NEXT: vucomiss %xmm0, %xmm1 # sched: [2:1.00] ; SKX-NEXT: jbe .LBB65_2 # sched: [1:0.50] ; SKX-NEXT: # %bb.1: # %l1 -; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB65_2: # %l2 -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 @@ -1338,7 +1338,7 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind { ; ; SKX-LABEL: sitof32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b @@ -1352,7 +1352,7 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; ; SKX-LABEL: sltof864: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b @@ -1366,7 +1366,7 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; ; SKX-LABEL: slto4f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <4 x i64> %a to <4 x double> ret <4 x double> %b @@ -1380,7 +1380,7 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; ; SKX-LABEL: slto2f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <2 x i64> %a to <2 x double> ret <2 x double> %b @@ -1423,7 +1423,7 @@ define <4 x i64> @f64to4sl(<4 x double> %a) { ; ; SKX-LABEL: f64to4sl: ; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvttpd2qq %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptosi <4 x double> %a to <4 x i64> ret <4 x i64> %b @@ -1483,7 +1483,7 @@ define <8 x double> @ulto8f64(<8 x i64> %a) { ; ; SKX-LABEL: ulto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i64> %a to <8 x double> ret <8 x double> %b @@ -1498,8 +1498,8 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; ; SKX-LABEL: ulto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.33] +; SKX-NEXT: vcvtuqq2pd %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtuqq2pd %zmm1, %zmm1 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i64> %a to <16 x double> ret <16 x double> %b @@ -1513,7 +1513,7 @@ define <16 x i32> @f64to16si(<16 x float> %a) nounwind { ; ; SKX-LABEL: f64to16si: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptosi <16 x float> %a to <16 x i32> ret <16 x i32> %b @@ -1527,7 +1527,7 @@ define <16 x i32> @f32to16ui(<16 x float> %a) nounwind { ; ; SKX-LABEL: f32to16ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2udq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui <16 x float> %a to <16 x i32> ret <16 x i32> %b @@ -1543,7 +1543,7 @@ define <16 x i8> @f32to16uc(<16 x float> %f) { ; ; SKX-LABEL: f32to16uc: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vpmovdb %zmm0, %xmm0 # sched: [4:2.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1560,7 +1560,7 @@ define <16 x i16> @f32to16us(<16 x float> %f) { ; ; SKX-LABEL: f32to16us: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vpmovdw %zmm0, %ymm0 # sched: [4:2.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = fptoui <16 x float> %f to <16 x i16> @@ -1575,7 +1575,7 @@ define <8 x i32> @f32to8ui(<8 x float> %a) nounwind { ; ; SKX-LABEL: f32to8ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2udq %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui <8 x float> %a to <8 x i32> ret <8 x i32> %b @@ -1589,7 +1589,7 @@ define <4 x i32> @f32to4ui(<4 x float> %a) nounwind { ; ; SKX-LABEL: f32to4ui: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2udq %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptoui <4 x float> %a to <4 x i32> ret <4 x i32> %b @@ -1684,7 +1684,7 @@ define <8 x double> @i32to8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwi ; SKX-LABEL: i32to8f64_mask: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00] +; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; VLNOBW-LABEL: i32to8f64_mask: ; VLNOBW: # %bb.0: @@ -1706,7 +1706,7 @@ define <8 x double> @sito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; SKX-LABEL: sito8f64_maskz: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; VLNOBW-LABEL: sito8f64_maskz: ; VLNOBW: # %bb.0: @@ -2094,7 +2094,7 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; ; SKX-LABEL: slto8f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <8 x i64> %a to <8 x double> ret <8 x double> %b @@ -2109,8 +2109,8 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; ; SKX-LABEL: slto16f64: ; SKX: # %bb.0: -; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.33] +; SKX-NEXT: vcvtqq2pd %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtqq2pd %zmm1, %zmm1 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i64> %a to <16 x double> ret <16 x double> %b @@ -2158,7 +2158,7 @@ define <8 x double> @uito8f64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwin ; SKX-LABEL: uito8f64_mask: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [7:1.00] +; SKX-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; VLNOBW-LABEL: uito8f64_mask: ; VLNOBW: # %bb.0: @@ -2180,7 +2180,7 @@ define <8 x double> @uito8f64_maskz(<8 x i32> %a, i8 %b) nounwind { ; SKX-LABEL: uito8f64_maskz: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [7:1.00] +; SKX-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %1 = bitcast i8 %b to <8 x i1> %2 = uitofp <8 x i32> %a to <8 x double> @@ -2210,7 +2210,7 @@ define <16 x float> @uito16f32(<16 x i32> %a) nounwind { ; ; SKX-LABEL: uito16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtudq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i32> %a to <16 x float> ret <16 x float> %b @@ -2238,7 +2238,7 @@ define <8 x float> @uito8f32(<8 x i32> %a) nounwind { ; ; SKX-LABEL: uito8f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcvtudq2ps %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <8 x i32> %a to <8 x float> ret <8 x float> %b @@ -2252,7 +2252,7 @@ define <4 x float> @uito4f32(<4 x i32> %a) nounwind { ; ; SKX-LABEL: uito4f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtudq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <4 x i32> %a to <4 x float> ret <4 x float> %b @@ -2266,7 +2266,7 @@ define i32 @fptosi(float %a) nounwind { ; ; SKX-LABEL: fptosi: ; SKX: # %bb.0: -; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [7:1.00] +; SKX-NEXT: vcvttss2si %xmm0, %eax # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %b = fptosi float %a to i32 ret i32 %b @@ -2326,7 +2326,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) { ; SKX: # %bb.0: ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = sitofp <16 x i1> %mask to <16 x float> @@ -2343,7 +2343,7 @@ define <16 x float> @scto16f32(<16 x i8> %a) { ; SKX-LABEL: scto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %1 = sitofp <16 x i8> %a to <16 x float> ret <16 x float> %1 @@ -2359,7 +2359,7 @@ define <16 x float> @ssto16f32(<16 x i16> %a) { ; SKX-LABEL: ssto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %1 = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %1 @@ -2482,8 +2482,8 @@ define <8 x float> @sbto8f32(<8 x float> %a) { ; SKX-LABEL: sbto8f32: ; SKX: # %bb.0: ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] +; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <8 x float> %a, zeroinitializer %1 = sitofp <8 x i1> %cmpres to <8 x float> @@ -2501,8 +2501,8 @@ define <4 x float> @sbto4f32(<4 x float> %a) { ; SKX-LABEL: sbto4f32: ; SKX: # %bb.0: ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <4 x float> %a, zeroinitializer %1 = sitofp <4 x i1> %cmpres to <4 x float> @@ -2541,8 +2541,8 @@ define <2 x float> @sbto2f32(<2 x float> %a) { ; SKX-LABEL: sbto2f32: ; SKX: # %bb.0: ; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <2 x float> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x float> @@ -2561,9 +2561,9 @@ define <2 x double> @sbto2f64(<2 x double> %a) { ; SKX-LABEL: sbto2f64: ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %cmpres = fcmp ogt <2 x double> %a, zeroinitializer %1 = sitofp <2 x i1> %cmpres to <2 x double> @@ -2580,7 +2580,7 @@ define <16 x float> @ucto16f32(<16 x i8> %a) { ; SKX-LABEL: ucto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i8> %a to <16 x float> ret <16 x float>%b @@ -2614,7 +2614,7 @@ define <16 x float> @swto16f32(<16 x i16> %a) { ; SKX-LABEL: swto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i16> %a to <16 x float> ret <16 x float> %b @@ -2686,7 +2686,7 @@ define <16 x float> @uwto16f32(<16 x i16> %a) { ; SKX-LABEL: uwto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b @@ -2736,7 +2736,7 @@ define <16 x float> @sito16f32(<16 x i32> %a) { ; ; SKX-LABEL: sito16f32: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = sitofp <16 x i32> %a to <16 x float> ret <16 x float> %b @@ -2772,7 +2772,7 @@ define <16 x float> @usto16f32(<16 x i16> %a) { ; SKX-LABEL: usto16f32: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [3:1.00] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b = uitofp <16 x i16> %a to <16 x float> ret <16 x float> %b @@ -2791,8 +2791,8 @@ define <16 x float> @ubto16f32(<16 x i32> %a) { ; SKX: # %bb.0: ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50] -; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] +; SKX-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp slt <16 x i32> %a, zeroinitializer %1 = uitofp <16 x i1> %mask to <16 x float> @@ -2814,7 +2814,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) { ; SKX: # %bb.0: ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:0.50] +; SKX-NEXT: vpsrld $31, %zmm0, %zmm1 # sched: [1:1.00] ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [7:1.00] ; SKX-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [3:1.00] ; SKX-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [7:1.00] @@ -2945,7 +2945,7 @@ define <2 x double> @ubto2f64(<2 x i32> %a) { ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] sched: [1:1.00] -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp ne <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x double> @@ -4253,7 +4253,7 @@ define <16 x i32> @zext_16i1_to_16xi32(i16 %b) { ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2d %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrld $31, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = bitcast i16 %b to <16 x i1> %c = zext <16 x i1> %a to <16 x i32> @@ -4272,7 +4272,7 @@ define <8 x i64> @zext_8i1_to_8xi64(i8 %b) { ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2q %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlq $63, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %a = bitcast i8 %b to <8 x i1> %c = zext <8 x i1> %a to <8 x i64> @@ -4312,7 +4312,7 @@ define i16 @trunc_16i32_to_16i1(<16 x i32> %a) { ; ; SKX-LABEL: trunc_16i32_to_16i1: ; SKX: # %bb.0: -; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpslld $31, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovd2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax @@ -4502,7 +4502,7 @@ define <64 x i16> @test21(<64 x i16> %x , <64 x i1> %mask) nounwind readnone { ; ; SKX-LABEL: test21: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:0.50] +; SKX-NEXT: vpsllw $7, %zmm2, %zmm2 # sched: [1:1.00] ; SKX-NEXT: vpmovb2m %zmm2, %k1 # sched: [1:1.00] ; SKX-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] ; SKX-NEXT: kshiftrq $32, %k1, %k1 # sched: [3:1.00] @@ -4666,7 +4666,7 @@ define <32 x i16> @zext_32xi1_to_32xi16(<32 x i16> %x, <32 x i16> %y) #0 { ; SKX: # %bb.0: ; SKX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2w %k0, %zmm0 # sched: [1:0.25] -; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsrlw $15, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %mask = icmp eq <32 x i16> %x, %y %1 = zext <32 x i1> %mask to <32 x i16> @@ -4763,8 +4763,8 @@ define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 ; ; SKX-LABEL: test_x86_fmadd_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fadd <16 x float> %x, %a2 @@ -4780,8 +4780,8 @@ define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 ; ; SKX-LABEL: test_x86_fmsub_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %x, %a2 @@ -4797,8 +4797,8 @@ define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; ; SKX-LABEL: test_x86_fnmadd_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vsubps %zmm0, %zmm2, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %res = fsub <16 x float> %a2, %x @@ -4815,9 +4815,9 @@ define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <1 ; ; SKX-LABEL: test_x86_fnmsub_ps_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vxorps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <16 x float> %a0, %a1 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, @@ -4837,8 +4837,8 @@ define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 ; ; SKX-LABEL: test_x86_fmadd_pd_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <8 x double> %a0, %a1 %res = fadd <8 x double> %x, %a2 @@ -4854,8 +4854,8 @@ define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 ; ; SKX-LABEL: test_x86_fmsub_pd_z: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.33] -; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %zmm1, %zmm0, %zmm0 # sched: [4:0.50] +; SKX-NEXT: vsubpd %zmm2, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul <8 x double> %a0, %a1 %res = fsub <8 x double> %x, %a2 @@ -4871,8 +4871,8 @@ define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) { ; ; SKX-LABEL: test_x86_fmsub_213: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vsubsd %xmm2, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %x = fmul double %a0, %a1 %res = fsub double %x, %a2 @@ -4888,7 +4888,7 @@ define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) { ; ; SKX-LABEL: test_x86_fmsub_213_m: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load double , double *%a2_ptr @@ -4907,7 +4907,7 @@ define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) { ; SKX-LABEL: test_x86_fmsub_231_m: ; SKX: # %bb.0: ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load double , double *%a2_ptr %x = fmul double %a0, %a2 @@ -4925,7 +4925,7 @@ define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; SKX-LABEL: test231_br: ; SKX: # %bb.0: ; SKX-NEXT: vmulps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000> %b2 = fadd <16 x float> %b1, %a2 @@ -4941,7 +4941,7 @@ define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind { ; ; SKX-LABEL: test213_br: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm1, %zmm0, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b1 = fmul <16 x float> %a1, %a2 @@ -4964,7 +4964,7 @@ define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <1 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm2 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm1, %zmm2, %zmm0 {%k1} # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 %x = fmul <16 x float> %a0, %a2 @@ -4989,7 +4989,7 @@ define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <1 ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] ; SKX-NEXT: vmulps (%rdi), %zmm0, %zmm0 # sched: [11:0.50] -; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.33] +; SKX-NEXT: vaddps %zmm1, %zmm0, %zmm1 {%k1} # sched: [4:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] %a2 = load <16 x float>,<16 x float> *%a2_ptrt,align 1 @@ -5014,7 +5014,7 @@ define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <1 ; SKX: # %bb.0: ; SKX-NEXT: vpsllw $7, %xmm2, %xmm2 # sched: [1:0.50] ; SKX-NEXT: vpmovb2m %xmm2, %k1 # sched: [1:1.00] -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: vaddps (%rdi), %zmm0, %zmm1 {%k1} # sched: [11:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -5035,7 +5035,7 @@ define <16 x i32> @vpandd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; SKX-LABEL: vpandd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5055,7 +5055,7 @@ define <16 x i32> @vpandnd(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readno ; SKX-LABEL: vpandnd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5077,7 +5077,7 @@ define <16 x i32> @vpord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnone ; SKX-LABEL: vpord: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5097,7 +5097,7 @@ define <16 x i32> @vpxord(<16 x i32> %a, <16 x i32> %b) nounwind uwtable readnon ; SKX-LABEL: vpxord: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5117,7 +5117,7 @@ define <8 x i64> @vpandq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone s ; SKX-LABEL: vpandq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5136,7 +5136,7 @@ define <8 x i64> @vpandnq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ; SKX-LABEL: vpandnq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpandnq %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5156,7 +5156,7 @@ define <8 x i64> @vporq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone ss ; SKX-LABEL: vporq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vporq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5175,7 +5175,7 @@ define <8 x i64> @vpxorq(<8 x i64> %a, <8 x i64> %b) nounwind uwtable readnone s ; SKX-LABEL: vpxorq: ; SKX: # %bb.0: # %entry ; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 # sched: [8:0.50] -; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vpxorq %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: ; Force the execution domain with an add. @@ -5192,7 +5192,7 @@ define <64 x i8> @and_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: and_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = and <64 x i8> %a, %b ret <64 x i8> %res @@ -5206,7 +5206,7 @@ define <64 x i8> @andn_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: andn_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b2 = xor <64 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, @@ -5224,7 +5224,7 @@ define <64 x i8> @or_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: or_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = or <64 x i8> %a, %b ret <64 x i8> %res @@ -5238,7 +5238,7 @@ define <64 x i8> @xor_v64i8(<64 x i8> %a, <64 x i8> %b) { ; ; SKX-LABEL: xor_v64i8: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = xor <64 x i8> %a, %b ret <64 x i8> %res @@ -5252,7 +5252,7 @@ define <32 x i16> @and_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: and_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = and <32 x i16> %a, %b ret <32 x i16> %res @@ -5266,7 +5266,7 @@ define <32 x i16> @andn_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: andn_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm0, %zmm1, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %b2 = xor <32 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> @@ -5282,7 +5282,7 @@ define <32 x i16> @or_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: or_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = or <32 x i16> %a, %b ret <32 x i16> %res @@ -5296,7 +5296,7 @@ define <32 x i16> @xor_v32i16(<32 x i16> %a, <32 x i16> %b) { ; ; SKX-LABEL: xor_v32i16: ; SKX: # %bb.0: -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %res = xor <32 x i16> %a, %b ret <32 x i16> %res @@ -5313,8 +5313,8 @@ define <16 x float> @masked_and_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; SKX-LABEL: masked_and_v16f32: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> @@ -5338,8 +5338,8 @@ define <16 x float> @masked_or_v16f32(<16 x float> %a, <16 x float> %b, <16 x fl ; SKX-LABEL: masked_or_v16f32: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> @@ -5363,8 +5363,8 @@ define <16 x float> @masked_xor_v16f32(<16 x float> %a, <16 x float> %b, <16 x f ; SKX-LABEL: masked_xor_v16f32: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandps %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddps %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <16 x float> %a to <16 x i32> %b1 = bitcast <16 x float> %b to <16 x i32> @@ -5388,8 +5388,8 @@ define <8 x double> @masked_and_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; SKX-LABEL: masked_and_v8f64: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> @@ -5413,8 +5413,8 @@ define <8 x double> @masked_or_v8f64(<8 x double> %a, <8 x double> %b, <8 x doub ; SKX-LABEL: masked_or_v8f64: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> @@ -5438,8 +5438,8 @@ define <8 x double> @masked_xor_v8f64(<8 x double> %a, <8 x double> %b, <8 x dou ; SKX-LABEL: masked_xor_v8f64: ; SKX: # %bb.0: ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.33] -; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm0, %zmm2 {%k1} # sched: [1:0.50] +; SKX-NEXT: vaddpd %zmm2, %zmm3, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %a1 = bitcast <8 x double> %a to <8 x i64> %b1 = bitcast <8 x double> %b to <8 x i64> @@ -5462,7 +5462,7 @@ define <8 x i64> @test_mm512_mask_and_epi32(<8 x i64> %__src, i16 zeroext %__k, ; SKX-LABEL: test_mm512_mask_and_epi32: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %and1.i.i = and <8 x i64> %__a, %__b @@ -5484,7 +5484,7 @@ define <8 x i64> @test_mm512_mask_or_epi32(<8 x i64> %__src, i16 zeroext %__k, < ; SKX-LABEL: test_mm512_mask_or_epi32: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %or1.i.i = or <8 x i64> %__a, %__b @@ -5506,7 +5506,7 @@ define <8 x i64> @test_mm512_mask_xor_epi32(<8 x i64> %__src, i16 zeroext %__k, ; SKX-LABEL: test_mm512_mask_xor_epi32: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %xor1.i.i = xor <8 x i64> %__a, %__b @@ -5528,7 +5528,7 @@ define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, ; SKX-LABEL: test_mm512_mask_xor_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5550,7 +5550,7 @@ define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, ; SKX-LABEL: test_mm512_maskz_xor_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5572,7 +5572,7 @@ define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, ; SKX-LABEL: test_mm512_mask_xor_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5594,7 +5594,7 @@ define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A ; SKX-LABEL: test_mm512_maskz_xor_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5616,7 +5616,7 @@ define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, < ; SKX-LABEL: test_mm512_mask_or_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5638,7 +5638,7 @@ define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, ; SKX-LABEL: test_mm512_maskz_or_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5660,7 +5660,7 @@ define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, ; SKX-LABEL: test_mm512_mask_or_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vorps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5682,7 +5682,7 @@ define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, ; SKX-LABEL: test_mm512_maskz_or_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5704,7 +5704,7 @@ define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, ; SKX-LABEL: test_mm512_mask_and_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5726,7 +5726,7 @@ define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, ; SKX-LABEL: test_mm512_maskz_and_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5748,7 +5748,7 @@ define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, ; SKX-LABEL: test_mm512_mask_and_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandps %zmm1, %zmm2, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5770,7 +5770,7 @@ define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A ; SKX-LABEL: test_mm512_maskz_and_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5792,7 +5792,7 @@ define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__ ; SKX-LABEL: test_mm512_mask_andnot_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5815,7 +5815,7 @@ define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %_ ; SKX-LABEL: test_mm512_maskz_andnot_pd: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <8 x double> %__A to <8 x i64> @@ -5838,7 +5838,7 @@ define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %_ ; SKX-LABEL: test_mm512_mask_andnot_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm0 {%k1} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -5861,7 +5861,7 @@ define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> % ; SKX-LABEL: test_mm512_maskz_andnot_ps: ; SKX: # %bb.0: # %entry ; SKX-NEXT: kmovd %edi, %k1 # sched: [1:1.00] -; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.33] +; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 {%k1} {z} # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] entry: %0 = bitcast <16 x float> %__A to <16 x i32> @@ -7927,7 +7927,7 @@ define void @store_32i1_1(<32 x i1>* %a, <32 x i16> %v) { ; ; SKX-LABEL: store_32i1_1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllw $15, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovw2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovd %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] @@ -7950,7 +7950,7 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; SKX-LABEL: store_64i1: ; SKX: # %bb.0: -; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:0.50] +; SKX-NEXT: vpsllw $7, %zmm0, %zmm0 # sched: [1:1.00] ; SKX-NEXT: vpmovb2m %zmm0, %k0 # sched: [1:1.00] ; SKX-NEXT: kmovq %k0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: vzeroupper # sched: [4:1.00] @@ -8709,7 +8709,7 @@ define <16 x float> @broadcast_ss_spill(float %x) { ; SKX: # %bb.0: ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] ; SKX-NEXT: .cfi_def_cfa_offset 32 -; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; SKX-NEXT: callq func_f32 ; SKX-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] @@ -8741,7 +8741,7 @@ define <8 x double> @broadcast_sd_spill(double %x) { ; SKX: # %bb.0: ; SKX-NEXT: subq $24, %rsp # sched: [1:0.25] ; SKX-NEXT: .cfi_def_cfa_offset 32 -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; SKX-NEXT: callq func_f64 ; SKX-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [8:0.50] diff --git a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll b/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll index dc75d05825c..53df4bfaba1 100644 --- a/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512vpopcntdq-schedule.ll @@ -25,15 +25,15 @@ define void @test_vpopcntd(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> *%a2, i16 ; ICELAKE: # %bb.0: ; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00] ; ICELAKE-NEXT: #APP -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:0.50] +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntd %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntd (%rdi){1to16}, %zmm0 {%k1} {z} # sched: [8:1.00] ; ICELAKE-NEXT: #NO_APP ; ICELAKE-NEXT: vzeroupper # sched: [4:1.00] ; ICELAKE-NEXT: retq # sched: [7:1.00] @@ -63,15 +63,15 @@ define void @test_vpopcntq(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> *%a2, i8 %a3) ; ICELAKE: # %bb.0: ; ICELAKE-NEXT: kmovd %esi, %k1 # sched: [1:1.00] ; ICELAKE-NEXT: #APP -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:0.50] -; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:0.50] +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntq %zmm1, %zmm0 {%k1} {z} # sched: [1:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi), %zmm0 {%k1} {z} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} # sched: [8:1.00] +; ICELAKE-NEXT: vpopcntq (%rdi){1to8}, %zmm0 {%k1} {z} # sched: [8:1.00] ; ICELAKE-NEXT: #NO_APP ; ICELAKE-NEXT: vzeroupper # sched: [4:1.00] ; ICELAKE-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/fma-schedule.ll b/llvm/test/CodeGen/X86/fma-schedule.ll index f69c62a8680..819b9c7f27d 100644 --- a/llvm/test/CodeGen/X86/fma-schedule.ll +++ b/llvm/test/CodeGen/X86/fma-schedule.ll @@ -75,9 +75,9 @@ define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmaddpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] @@ -167,9 +167,9 @@ define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfmaddpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] @@ -257,9 +257,9 @@ define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmaddps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50] @@ -349,9 +349,9 @@ define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; SKX-LABEL: test_vfmaddps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [11:0.50] @@ -439,9 +439,9 @@ define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmaddsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] @@ -527,9 +527,9 @@ define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmaddss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [9:0.50] @@ -619,9 +619,9 @@ define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-LABEL: test_vfmaddsubpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] ; SKX-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] ; SKX-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] @@ -711,9 +711,9 @@ define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; SKX-LABEL: test_vfmaddsubpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] ; SKX-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKX-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] @@ -801,9 +801,9 @@ define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-LABEL: test_vfmaddsubps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50] ; SKX-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50] ; SKX-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50] @@ -893,9 +893,9 @@ define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKX-LABEL: test_vfmaddsubps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.33] -; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.33] -; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.33] +; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [4:0.50] +; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [4:0.50] +; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [4:0.50] ; SKX-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [11:0.50] ; SKX-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [11:0.50] ; SKX-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [11:0.50] @@ -987,9 +987,9 @@ define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-LABEL: test_vfmsubaddpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] ; SKX-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] @@ -1079,9 +1079,9 @@ define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl ; SKX-LABEL: test_vfmsubaddpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKX-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] @@ -1169,9 +1169,9 @@ define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-LABEL: test_vfmsubaddps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50] ; SKX-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50] @@ -1261,9 +1261,9 @@ define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SKX-LABEL: test_vfmsubaddps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [11:0.50] ; SKX-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [11:0.50] @@ -1355,9 +1355,9 @@ define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmsubpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] @@ -1447,9 +1447,9 @@ define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfmsubpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] @@ -1537,9 +1537,9 @@ define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmsubps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50] @@ -1629,9 +1629,9 @@ define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2 ; SKX-LABEL: test_vfmsubps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [11:0.50] @@ -1719,9 +1719,9 @@ define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfmsubsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] @@ -1807,9 +1807,9 @@ define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2 ; SKX-LABEL: test_vfmsubss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [9:0.50] @@ -1899,9 +1899,9 @@ define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmaddpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] @@ -1991,9 +1991,9 @@ define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfnmaddpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] @@ -2081,9 +2081,9 @@ define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmaddps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] ; SKX-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50] @@ -2173,9 +2173,9 @@ define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; SKX-LABEL: test_vfnmaddps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] ; SKX-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [11:0.50] @@ -2263,9 +2263,9 @@ define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmaddsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] @@ -2351,9 +2351,9 @@ define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmaddss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] ; SKX-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [9:0.50] @@ -2443,9 +2443,9 @@ define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmsubpd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] @@ -2535,9 +2535,9 @@ define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SKX-LABEL: test_vfnmsubpd_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] @@ -2625,9 +2625,9 @@ define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmsubps_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50] ; SKX-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50] ; SKX-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50] @@ -2717,9 +2717,9 @@ define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a ; SKX-LABEL: test_vfnmsubps_256: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [11:0.50] ; SKX-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [11:0.50] ; SKX-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [11:0.50] @@ -2807,9 +2807,9 @@ define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-LABEL: test_vfnmsubsd_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] @@ -2895,9 +2895,9 @@ define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a ; SKX-LABEL: test_vfnmsubss_128: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.33] -; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.33] +; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [4:0.50] +; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [4:0.50] ; SKX-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [9:0.50] ; SKX-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [9:0.50] ; SKX-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [9:0.50] diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 9a60934cba5..ac87a55f2a7 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -152,9 +152,9 @@ define <2 x double> @test_cvtpi2pd(x86_mmx %a0, x86_mmx* %a1) optsize { ; ; SKX-LABEL: test_cvtpi2pd: ; SKX: # %bb.0: -; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: cvtpi2pd %mm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: cvtpi2pd (%rdi), %xmm1 # sched: [9:0.50] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpi2pd: @@ -232,7 +232,7 @@ define <4 x float> @test_cvtpi2ps(x86_mmx %a0, x86_mmx* %a1, <4 x float> %a2, <4 ; SKX: # %bb.0: ; SKX-NEXT: cvtpi2ps %mm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: cvtpi2ps (%rdi), %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-LABEL: test_cvtpi2ps: diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index 29254f5813f..06447ab9d63 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -153,7 +153,7 @@ define float @f32_one_step(float %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1.0, %x ret float %div @@ -271,10 +271,10 @@ define float @f32_two_step(float %x) #2 { ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 1.0, %x ret float %div @@ -418,7 +418,7 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <4 x float> %div @@ -536,10 +536,10 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <4 x float> %div @@ -693,7 +693,7 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <8 x float> %div @@ -824,10 +824,10 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <8 x float> %div @@ -1031,9 +1031,9 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 { ; ; SKX-LABEL: v16f32_one_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <16 x float> %div @@ -1235,13 +1235,13 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 { ; ; SKX-LABEL: v16f32_two_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <16 x float> %div diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll index aaaf0c6ab7e..7cc09dc96f2 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -154,7 +154,7 @@ define float @f32_one_step_2(float %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 3456.0, %x @@ -254,9 +254,9 @@ define float @f32_one_step_2_divs(float %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [9:0.50] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] -; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 3456.0, %x %div2 = fdiv fast float %div, %x @@ -383,10 +383,10 @@ define float @f32_two_step_2(float %x) #2 { ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast float 6789.0, %x @@ -480,7 +480,7 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x @@ -582,9 +582,9 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [10:0.50] -; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x %div2 = fdiv fast <4 x float> %div, %x @@ -711,10 +711,10 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; SKX-NEXT: vrcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm3 = -(xmm0 * xmm3) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm3 = (xmm3 * xmm1) + xmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm3 * xmm0) + xmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, %x @@ -816,7 +816,7 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x @@ -927,9 +927,9 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 { ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [11:0.50] -; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x %div2 = fdiv fast <8 x float> %div, %x @@ -1070,10 +1070,10 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SKX-NEXT: vrcpps %ymm0, %ymm1 # sched: [4:1.00] ; SKX-NEXT: vbroadcastss {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm3 = -(ymm0 * ymm3) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm3 = (ymm3 * ymm1) + ymm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm3 * ymm0) + ymm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <8 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>, %x @@ -1331,9 +1331,9 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 { ; ; SKX-LABEL: v16f32_one_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x @@ -1498,11 +1498,11 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 { ; ; SKX-LABEL: v16f32_one_step_2_divs: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + mem sched: [11:0.50] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.33] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm1) + zmm1 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm1 # sched: [11:0.50] -; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %zmm0, %zmm1, %zmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x %div2 = fdiv fast <16 x float> %div, %x @@ -1721,13 +1721,13 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 { ; ; SKX-LABEL: v16f32_two_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm1 # sched: [4:2.00] ; SKX-NEXT: vbroadcastss {{.*#+}} zmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] sched: [8:0.50] ; SKX-NEXT: vmovaps %zmm1, %zmm3 # sched: [1:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.33] -; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.33] -; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.33] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm3 = -(zmm0 * zmm3) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm3 = (zmm3 * zmm1) + zmm1 sched: [4:0.50] +; SKX-NEXT: vfnmadd213ps {{.*#+}} zmm0 = -(zmm3 * zmm0) + zmm2 sched: [4:0.50] +; SKX-NEXT: vfmadd132ps {{.*#+}} zmm0 = (zmm0 * zmm3) + zmm3 sched: [4:0.50] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x @@ -1786,7 +1786,7 @@ define <16 x float> @v16f32_no_step(<16 x float> %x) #3 { ; ; SKX-LABEL: v16f32_no_step: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x ret <16 x float> %div @@ -1861,7 +1861,7 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 { ; ; SKX-LABEL: v16f32_no_step2: ; SKX: # %bb.0: -; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [9:2.00] +; SKX-NEXT: vrcp14ps %zmm0, %zmm0 # sched: [4:2.00] ; SKX-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 # sched: [11:0.50] ; SKX-NEXT: retq # sched: [7:1.00] %div = fdiv fast <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, %x diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 84b2f91424e..3eb4cb15478 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -8376,13 +8376,13 @@ define void @test_nop(i16 %a0, i32 %a1, i64 %a2, i16 *%p0, i32 *%p1, i64 *%p2) o ; SKX-LABEL: test_nop: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: nop # sched: [1:0.25] -; SKX-NEXT: nopw %di # sched: [1:0.25] -; SKX-NEXT: nopw (%rcx) # sched: [1:0.25] -; SKX-NEXT: nopl %esi # sched: [1:0.25] -; SKX-NEXT: nopl (%r8) # sched: [1:0.25] -; SKX-NEXT: nopq %rdx # sched: [1:0.25] -; SKX-NEXT: nopq (%r9) # sched: [1:0.25] +; SKX-NEXT: nop # sched: [1:0.17] +; SKX-NEXT: nopw %di # sched: [1:0.17] +; SKX-NEXT: nopw (%rcx) # sched: [1:0.17] +; SKX-NEXT: nopl %esi # sched: [1:0.17] +; SKX-NEXT: nopl (%r8) # sched: [1:0.17] +; SKX-NEXT: nopq %rdx # sched: [1:0.17] +; SKX-NEXT: nopq (%r9) # sched: [1:0.17] ; SKX-NEXT: #NO_APP ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/CodeGen/X86/sha-schedule.ll b/llvm/test/CodeGen/X86/sha-schedule.ll index dedf6f98b8a..7069bd3a119 100644 --- a/llvm/test/CodeGen/X86/sha-schedule.ll +++ b/llvm/test/CodeGen/X86/sha-schedule.ll @@ -23,7 +23,7 @@ define <4 x i32> @test_sha1msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1msg1: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1msg1 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1msg1 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -54,7 +54,7 @@ define <4 x i32> @test_sha1msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1msg2: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1msg2 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1msg2 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -85,7 +85,7 @@ define <4 x i32> @test_sha1nexte(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1nexte: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1nexte %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1nexte (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -116,7 +116,7 @@ define <4 x i32> @test_sha1rnds4(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; CANNONLAKE-LABEL: test_sha1rnds4: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -151,7 +151,7 @@ define <4 x i32> @test_sha256msg1(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; ; CANNONLAKE-LABEL: test_sha256msg1: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha256msg1 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha256msg1 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -182,7 +182,7 @@ define <4 x i32> @test_sha256msg2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) ; ; CANNONLAKE-LABEL: test_sha256msg2: ; CANNONLAKE: # %bb.0: -; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha256msg2 %xmm1, %xmm0 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha256msg2 (%rdi), %xmm0 # sched: [10:0.50] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] ; @@ -221,7 +221,7 @@ define <4 x i32> @test_sha256rnds2(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, ; CANNONLAKE: # %bb.0: ; CANNONLAKE-NEXT: vmovaps %xmm0, %xmm3 # sched: [1:0.33] ; CANNONLAKE-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:0.33] -; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.33] +; CANNONLAKE-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # sched: [4:0.50] ; CANNONLAKE-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm3 # sched: [10:0.50] ; CANNONLAKE-NEXT: vmovaps %xmm3, %xmm0 # sched: [1:0.33] ; CANNONLAKE-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index bd1e44f6561..f18d0934198 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -90,13 +90,13 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_addps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addps: ; SKX: # %bb.0: -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -198,13 +198,13 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_addss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addss: ; SKX: # %bb.0: -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -551,14 +551,14 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_cmpps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpps: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -669,13 +669,13 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_cmpss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpss: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1041,14 +1041,14 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2ss: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2ss: @@ -1167,14 +1167,14 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] ; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2ssq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2ssq: @@ -1420,14 +1420,14 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; ; SKX-SSE-LABEL: test_cvtss2siq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [7:1.00] ; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] ; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtss2siq: ; SKX: # %bb.0: -; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] +; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [7:1.00] ; SKX-NEXT: vcvtss2si (%rdi), %rax # sched: [11:1.00] ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1549,14 +1549,14 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; ; SKX-SSE-LABEL: test_cvttss2si: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] +; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [6:1.00] ; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] ; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttss2si: ; SKX: # %bb.0: -; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] +; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [6:1.00] ; SKX-NEXT: vcvttss2si (%rdi), %eax # sched: [11:1.00] ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] @@ -2116,13 +2116,13 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_maxps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxps: ; SKX: # %bb.0: -; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2225,13 +2225,13 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_maxss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxss: ; SKX: # %bb.0: -; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2334,13 +2334,13 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_minps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minps: ; SKX: # %bb.0: -; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2443,13 +2443,13 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_minss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minss: ; SKX: # %bb.0: -; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2564,14 +2564,14 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SKX-SSE-LABEL: test_movaps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movaps: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2795,7 +2795,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movhps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] @@ -2803,7 +2803,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-LABEL: test_movhps: ; SKX: # %bb.0: ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2918,13 +2918,13 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; SKX-SSE-LABEL: test_movlhps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlhps: ; SKX: # %bb.0: ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movlhps: @@ -3036,14 +3036,14 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movlps: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlps: ; SKX: # %bb.0: ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3352,14 +3352,14 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SKX-SSE-LABEL: test_movss_mem: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movss_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3572,14 +3572,14 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SKX-SSE-LABEL: test_movups: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movups: ; SKX: # %bb.0: ; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3685,13 +3685,13 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_mulps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulps: ; SKX: # %bb.0: -; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3793,13 +3793,13 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_mulss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulss: ; SKX: # %bb.0: -; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4206,14 +4206,14 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rcpps: ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vrcpps (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rcpps: @@ -4347,7 +4347,7 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] ; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rcpss: @@ -4355,7 +4355,7 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rcpss: @@ -4483,14 +4483,14 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] ; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rsqrtps: ; SKX: # %bb.0: ; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [10:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rsqrtps: @@ -4624,7 +4624,7 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] ; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_rsqrtss: @@ -4632,7 +4632,7 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_rsqrtss: @@ -4854,14 +4854,14 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] ; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufps: ; SKX: # %bb.0: ; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] ; SKX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_shufps: @@ -4981,14 +4981,14 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:3.00] ; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:3.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtps: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:3.00] ; SKX-NEXT: vsqrtps (%rdi), %xmm1 # sched: [18:3.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtps: @@ -5122,7 +5122,7 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:3.00] ; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] ; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:3.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtss: @@ -5130,7 +5130,7 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:3.00] ; SKX-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] ; SKX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [12:3.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtss: @@ -5351,13 +5351,13 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SKX-SSE-LABEL: test_subps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subps: ; SKX: # %bb.0: -; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5459,13 +5459,13 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; ; SKX-SSE-LABEL: test_subss: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subss: ; SKX: # %bb.0: -; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5826,14 +5826,14 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhps: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKX-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpckhps: @@ -5952,14 +5952,14 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpcklps: ; SKX: # %bb.0: ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpcklps: @@ -6210,7 +6210,7 @@ define <4 x float> @test_fnop() nounwind { ; SKX-SSE-LABEL: test_fnop: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: #APP -; SKX-SSE-NEXT: nop # sched: [1:0.25] +; SKX-SSE-NEXT: nop # sched: [1:0.17] ; SKX-SSE-NEXT: #NO_APP ; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] @@ -6218,7 +6218,7 @@ define <4 x float> @test_fnop() nounwind { ; SKX-LABEL: test_fnop: ; SKX: # %bb.0: ; SKX-NEXT: #APP -; SKX-NEXT: nop # sched: [1:0.25] +; SKX-NEXT: nop # sched: [1:0.17] ; SKX-NEXT: #NO_APP ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 55a5fe6c8ed..0af49688c52 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -88,13 +88,13 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_addpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -196,13 +196,13 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_addsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsd: ; SKX: # %bb.0: -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -317,14 +317,14 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andpd: ; SKX: # %bb.0: ; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andpd: @@ -447,14 +447,14 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_andnotpd: @@ -673,14 +673,14 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_cmppd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmppd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] ; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -790,13 +790,13 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_cmpsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cmpsd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1162,16 +1162,16 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; ; SKX-SSE-LABEL: test_cvtdq2pd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2pd: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtdq2pd: @@ -1291,16 +1291,16 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; ; SKX-SSE-LABEL: test_cvtdq2ps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtdq2ps: @@ -1427,7 +1427,7 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-LABEL: test_cvtpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1550,14 +1550,14 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtpd2ps: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtpd2ps: @@ -1676,14 +1676,14 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; ; SKX-SSE-LABEL: test_cvtps2dq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -1806,14 +1806,14 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtps2pd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtps2pd: @@ -2205,7 +2205,7 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] ; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsd2ss: @@ -2213,7 +2213,7 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] ; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsd2ss: @@ -2336,14 +2336,14 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2sd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2sd: @@ -2462,14 +2462,14 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtsi2sdq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtsi2sdq: @@ -2603,7 +2603,7 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] ; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvtss2sd: @@ -2611,7 +2611,7 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] ; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] ; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_cvtss2sd: @@ -2742,7 +2742,7 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-LABEL: test_cvttpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] +; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -2863,14 +2863,14 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; ; SKX-SSE-LABEL: test_cvttps2dq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_cvttps2dq: ; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] @@ -3732,13 +3732,13 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_maxpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxpd: ; SKX: # %bb.0: -; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3841,13 +3841,13 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_maxsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_maxsd: ; SKX: # %bb.0: -; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -3950,13 +3950,13 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_minpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minpd: ; SKX: # %bb.0: -; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4059,13 +4059,13 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_minsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_minsd: ; SKX: # %bb.0: -; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4180,14 +4180,14 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-SSE-LABEL: test_movapd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movapd: ; SKX: # %bb.0: ; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4917,14 +4917,14 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movhpd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movhpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5045,14 +5045,14 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-SSE-LABEL: test_movlpd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movlpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5362,13 +5362,13 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SKX-SSE-LABEL: test_movntpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movntpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5717,14 +5717,14 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SKX-SSE-LABEL: test_movsd_mem: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsd_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -5945,14 +5945,14 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-SSE-LABEL: test_movupd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movupd: ; SKX: # %bb.0: ; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -6058,13 +6058,13 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_mulpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulpd: ; SKX: # %bb.0: -; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -6166,13 +6166,13 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_mulsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_mulsd: ; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -6287,14 +6287,14 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_orpd: ; SKX: # %bb.0: ; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_orpd: @@ -9176,13 +9176,13 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmaddwd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddwd: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -9830,13 +9830,13 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmulhuw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhuw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -9939,13 +9939,13 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmulhw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -10048,13 +10048,13 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmullw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmullw: ; SKX: # %bb.0: -; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -10156,13 +10156,13 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SKX-SSE-LABEL: test_pmuludq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuludq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -14094,14 +14094,14 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] ; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_shufpd: ; SKX: # %bb.0: ; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] ; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_shufpd: @@ -14221,14 +14221,14 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] ; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtpd: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] ; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtpd: @@ -14362,7 +14362,7 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] ; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] ; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_sqrtsd: @@ -14370,7 +14370,7 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] ; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] ; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_sqrtsd: @@ -14481,13 +14481,13 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SKX-SSE-LABEL: test_subpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subpd: ; SKX: # %bb.0: -; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -14589,13 +14589,13 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; ; SKX-SSE-LABEL: test_subsd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_subsd: ; SKX: # %bb.0: -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -14956,14 +14956,14 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpckhpd: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] ; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpckhpd: @@ -15097,7 +15097,7 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] ; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] ; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; @@ -15105,7 +15105,7 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX: # %bb.0: ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpcklpd: @@ -15228,14 +15228,14 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] ; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_xorpd: ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_xorpd: diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index ae656e5cd1c..d8b0c45e866 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -88,13 +88,13 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SKX-SSE-LABEL: test_addsubpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubpd: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -197,13 +197,13 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SKX-SSE-LABEL: test_addsubps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_addsubps: ; SKX: # %bb.0: -; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -972,14 +972,14 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] ; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] -; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movddup: ; SKX: # %bb.0: ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] ; SKX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [5:0.50] -; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movddup: @@ -1099,14 +1099,14 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] ; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movshdup: ; SKX: # %bb.0: ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] ; SKX-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movshdup: @@ -1226,14 +1226,14 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] ; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_movsldup: ; SKX: # %bb.0: ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] ; SKX-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_movsldup: diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index 9cb7ecac832..f38dd4aafa5 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -92,14 +92,14 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-SSE-LABEL: test_blendpd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -212,14 +212,14 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] ; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_blendps: ; SKX: # %bb.0: ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] ; SKX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_blendps: @@ -2065,14 +2065,14 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; ; SKX-SSE-LABEL: test_phminposuw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] +; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_phminposuw: ; SKX: # %bb.0: -; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] +; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_phminposuw: @@ -4767,13 +4767,13 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SKX-SSE-LABEL: test_pmuldq: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmuldq: ; SKX: # %bb.0: -; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -4871,14 +4871,14 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SKX-SSE-LABEL: test_pmulld: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67] -; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67] +; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] +; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulld: ; SKX: # %bb.0: -; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67] -; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67] +; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] +; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_pmulld: @@ -5153,16 +5153,16 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SKX-SSE-LABEL: test_roundpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: -; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:0.67] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [14:1.00] +; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundpd: @@ -5275,16 +5275,16 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; ; SKX-SSE-LABEL: test_roundps: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: -; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] -; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:0.67] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] +; SKX-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [14:1.00] +; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundps: @@ -5402,16 +5402,16 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-SSE-LABEL: test_roundsd: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67] -; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundsd: ; SKX: # %bb.0: -; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundsd: @@ -5531,16 +5531,16 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-SSE-LABEL: test_roundss: ; SKX-SSE: # %bb.0: ; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67] -; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67] -; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_roundss: ; SKX: # %bb.0: -; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] -; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:0.67] -; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] +; SKX-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] +; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_roundss: diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll index a0a0355fe34..b10b1bb5c74 100644 --- a/llvm/test/CodeGen/X86/ssse3-schedule.ll +++ b/llvm/test/CodeGen/X86/ssse3-schedule.ll @@ -1249,13 +1249,13 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SKX-SSE-LABEL: test_pmaddubsw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmaddubsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; @@ -1359,13 +1359,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SKX-SSE-LABEL: test_pmulhrsw: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_pmulhrsw: ; SKX: # %bb.0: -; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] +; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s index 3eb5c356178..fc28ed4288f 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1018,25 +1018,25 @@ vzeroupper # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 vaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vaddsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vaddsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vaddss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddsubpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vaddsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vaddsubps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vaddsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vaddsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vaddsubps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 4 1.00 vaesdec %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 1.00 * vaesdec (%rax), %xmm1, %xmm2 @@ -1086,41 +1086,41 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: 1 6 0.50 * vbroadcastss (%rax), %xmm2 # CHECK-NEXT: 1 7 0.50 * vbroadcastss (%rax), %ymm2 -# CHECK-NEXT: 1 4 0.33 vcmppd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmppd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcmppd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vcmppd $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcmppd $0, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcmppd $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vcmpps $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmpps $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcmpps $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vcmpps $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcmpps $0, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcmpps $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vcmpsd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmpsd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vcmpsd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vcmpss $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcmpss $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vcmpss $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vcomisd %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * vcomisd (%rax), %xmm1 # CHECK-NEXT: 1 2 1.00 vcomiss %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * vcomiss (%rax), %xmm1 -# CHECK-NEXT: 2 5 1.00 vcvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * vcvtdq2pd (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtdq2pd %xmm0, %ymm2 # CHECK-NEXT: 3 13 1.00 * vcvtdq2pd (%rax), %ymm2 -# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtdq2ps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvtdq2ps %ymm0, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 2 8 1.00 * vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvtpd2psy (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 3 8 1.00 * vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtps2dq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvtps2dq %ymm0, %ymm2 +# CHECK-NEXT: 1 4 0.50 vcvtps2dq %ymm0, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: 2 5 1.00 vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * vcvtps2pd (%rax), %xmm2 @@ -1143,22 +1143,22 @@ vzeroupper # CHECK-NEXT: 2 5 1.00 vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvtss2sd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 vcvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 vcvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: 2 7 1.00 vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: 2 8 1.00 * vcvttpd2dqy (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 2 8 0.50 * vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * vcvttps2dq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vcvttps2dq %ymm0, %ymm2 +# CHECK-NEXT: 1 3 0.50 vcvttps2dq %ymm0, %ymm2 # CHECK-NEXT: 2 11 0.50 * vcvttps2dq (%rax), %ymm2 # CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 6 1.00 vcvttsd2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttsd2si (%rax), %rcx -# CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 vcvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 vcvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * vcvttss2si (%rax), %rcx @@ -1216,29 +1216,29 @@ vzeroupper # CHECK-NEXT: 2 8 0.50 * vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: 2 2 1.00 * * vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: 2 2 1.00 * * vmaskmovps %ymm0, %ymm1, (%rax) -# CHECK-NEXT: 1 4 0.33 vmaxpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmaxpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmaxpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmaxps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmaxps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmaxps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmaxsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmaxss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmaxss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmaxss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vminpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vminpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vminps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vminps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vminps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vminsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vminss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vminss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.33 vmovapd %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vmovapd %xmm0, (%rax) @@ -1327,17 +1327,17 @@ vzeroupper # CHECK-NEXT: 1 7 0.50 * vmovups (%rax), %ymm2 # CHECK-NEXT: 2 4 2.00 vmpsadbw $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 10 2.00 * vmpsadbw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmulpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmulpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmulpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vmulps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vmulps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vmulps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vmulsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmulsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vmulss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vmulss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.33 vorpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 * vorpd (%rax), %xmm1, %xmm2 @@ -1441,8 +1441,8 @@ vzeroupper # CHECK-NEXT: 4 9 2.00 * vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 3 2.00 vphaddw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 9 2.00 * vphaddw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: 2 10 0.50 * vphminposuw (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 vphminposuw %xmm0, %xmm2 +# CHECK-NEXT: 2 10 1.00 * vphminposuw (%rax), %xmm2 # CHECK-NEXT: 3 3 2.00 vphsubd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 9 2.00 * vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 3 2.00 vphsubsw %xmm0, %xmm1, %xmm2 @@ -1457,9 +1457,9 @@ vzeroupper # CHECK-NEXT: 2 6 1.00 * vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 2 2.00 vpinsrw $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: 2 6 1.00 * vpinsrw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmaddubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmaddubsw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmaddubsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmaddwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmaddwd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.50 vpmaxsb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 * vpmaxsb (%rax), %xmm1, %xmm2 @@ -1510,19 +1510,19 @@ vzeroupper # CHECK-NEXT: 2 6 1.00 * vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vpmovzxwq %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * vpmovzxwq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmuldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmuldq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmuldq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmulhrsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmulhrsw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmulhrsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmulhuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmulhuw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmulhuw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmulhw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmulhw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmulhw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 10 0.67 vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 16 0.67 * vpmulld (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmullw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 10 1.00 vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 16 1.00 * vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmullw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmullw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vpmuludq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vpmuludq %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 1 0.33 vpor %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 7 0.50 * vpor (%rax), %xmm1, %xmm2 @@ -1612,18 +1612,18 @@ vzeroupper # CHECK-NEXT: 2 11 1.00 * vrcpps (%rax), %ymm2 # CHECK-NEXT: 1 4 1.00 vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 1.00 * vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: 3 15 0.67 * vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: 2 8 0.67 vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: 3 15 0.67 * vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: 2 8 0.67 vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 2 8 0.67 vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 3 14 0.67 * vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 3 15 1.00 * vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: 2 8 1.00 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 3 15 1.00 * vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: 2 8 1.00 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 3 14 1.00 * vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 4 1.00 vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrsqrtps (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 vrsqrtps %ymm0, %ymm2 @@ -1651,17 +1651,17 @@ vzeroupper # CHECK-NEXT: 1 12 3.00 vsqrtss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 17 3.00 * vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 2 1.00 * * * vstmxcsr (%rax) -# CHECK-NEXT: 1 4 0.33 vsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vsubpd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vsubps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vsubps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vsubsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vsubsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vsubss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vsubss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vtestpd %xmm0, %xmm1 # CHECK-NEXT: 2 8 1.00 * vtestpd (%rax), %xmm1 @@ -1716,30 +1716,30 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 123.00 271.00 170.00 171.17 171.17 34.00 376.00 5.00 12.67 +# CHECK-NEXT: - 123.00 290.83 198.83 171.17 171.17 34.00 327.33 5.00 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vaddsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vaddsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddsubps (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vaesdec %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vaesdec (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vaesdeclast %xmm0, %xmm1, %xmm2 @@ -1788,50 +1788,50 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - vbroadcastss (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmppd $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmppd $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpps $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpps $0, (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpsd $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpsd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcmpss $0, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcmpss $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmppd $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmppd $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpps $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpps $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpsd $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpsd $0, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcmpss $0, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcmpss $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vcomisd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomisd (%rax), %xmm1 # CHECK-NEXT: - - 1.00 - - - - - - - vcomiss %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomiss (%rax), %xmm1 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtdq2pd %xmm0, %ymm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - vcvtdq2pd (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtdq2ps %ymm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtdq2ps %ymm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2ps (%rax), %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtpd2dqy (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtpd2ps %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtpd2psy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 1.00 - - vcvtpd2psy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvtps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvtps2dq %ymm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2dq (%rax), %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2pd %xmm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtps2pd (%rax), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtsd2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsd2ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - vcvtsd2ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtsi2sdl %ecx, %xmm0, %xmm2 @@ -1844,26 +1844,26 @@ vzeroupper # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvtsi2ssq (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtss2sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtss2sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvtss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvtss2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvtss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqx (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvttpd2dq %ymm0, %xmm2 -# CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvttpd2dqy (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vcvttps2dq %ymm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vcvttps2dq %ymm0, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvttps2dq (%rax), %ymm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - vcvttsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttsd2si (%rax), %rcx -# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vcvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - vcvttss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - vcvttss2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vcvttss2si (%rax), %rcx # CHECK-NEXT: - 3.00 1.00 - - - - - - - vdivpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - vdivpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - 5.00 1.00 - - - - - - - vdivpd %ymm0, %ymm1, %ymm2 @@ -1918,30 +1918,30 @@ vzeroupper # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaskmovps (%rax), %ymm0, %ymm2 # CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %xmm0, %xmm1, (%rax) # CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vmaskmovps %ymm0, %ymm1, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmaxss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmaxss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vminss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vminss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmaxss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmaxss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vminss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vminss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmovapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vmovapd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovapd (%rax), %xmm2 @@ -2029,18 +2029,18 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - - - - vmovups (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 2.00 - - vmpsadbw $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vmpsadbw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vmulss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vmulss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vmulss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vmulss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vorpd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorpd %ymm0, %ymm1, %ymm2 @@ -2143,8 +2143,8 @@ vzeroupper # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphaddsw (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphaddw %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphaddw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vphminposuw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vphminposuw (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - - - vphminposuw %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vphminposuw (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 2.00 - - vphsubsw %xmm0, %xmm1, %xmm2 @@ -2159,10 +2159,10 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrq $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 2.00 - - vpinsrw $1, %eax, %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpinsrw $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsb %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaxsb (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaxsd %xmm0, %xmm1, %xmm2 @@ -2212,20 +2212,20 @@ vzeroupper # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %xmm0, %xmm1, %xmm2 @@ -2314,18 +2314,18 @@ vzeroupper # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpps (%rax), %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundpd $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundps $1, %ymm0, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundsd $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vroundss $1, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vroundss $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %ymm0, %ymm2 @@ -2353,18 +2353,18 @@ vzeroupper # CHECK-NEXT: - 3.00 1.00 - - - - - - - vsqrtss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - vsqrtss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 vstmxcsr (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubpd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubsd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubsd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vsubss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vsubss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubsd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubsd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vsubss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vsubss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vtestpd (%rax), %xmm1 # CHECK-NEXT: - - 1.00 - - - - - - - vtestpd %ymm0, %ymm1 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s index 5c626f26e65..a50febf70bd 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx2.s @@ -583,9 +583,9 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 4 10 2.00 * vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: 3 3 2.00 vphsubw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 4 10 2.00 * vphsubw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmaddubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmaddubsw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmaddubsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmaddwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmaddwd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmaddwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 7 0.50 * vpmaskmovd (%rax), %xmm0, %xmm2 # CHECK-NEXT: 2 8 0.50 * vpmaskmovd (%rax), %ymm0, %ymm2 @@ -644,19 +644,19 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: 2 9 1.00 * vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: 1 3 1.00 vpmovzxwq %xmm0, %ymm2 # CHECK-NEXT: 2 10 1.00 * vpmovzxwq (%rax), %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmuldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmuldq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmuldq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmulhrsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmulhrsw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmulhrsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmulhuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmulhuw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmulhuw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmulhw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmulhw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmulhw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 2 10 0.67 vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 3 17 0.67 * vpmulld (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmullw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 10 1.00 vpmulld %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 3 17 1.00 * vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmullw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmullw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vpmuludq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vpmuludq %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 1 0.33 vpor %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 0.50 * vpor (%rax), %ymm1, %ymm2 @@ -771,7 +771,7 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 107.00 86.00 85.17 85.17 1.00 169.00 - 1.67 +# CHECK-NEXT: - - 110.33 89.33 85.17 85.17 1.00 162.33 - 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -901,10 +901,10 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - 2.00 - - vphsubsw (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - vphsubw %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - vphsubw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddubsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddubsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmaddwd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaddwd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddubsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddubsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmaddwd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmaddwd (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmaskmovd (%rax), %ymm0, %ymm2 # CHECK-NEXT: - - 1.00 - 0.33 0.33 - - - 0.33 vpmaskmovd %xmm0, %xmm1, (%rax) @@ -962,20 +962,20 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwd (%rax), %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpmovzxwq %xmm0, %ymm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpmovzxwq (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuldq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuldq (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhrsw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhrsw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhuw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhuw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmulhw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmulhw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - vpmulld %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - vpmulld (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmullw %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmullw (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpmuludq %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpmuludq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuldq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuldq (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhrsw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhrsw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhuw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhuw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmulhw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmulhw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - vpmulld %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - vpmulld (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmullw %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmullw (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vpmuludq %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vpmuludq (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpor %ymm0, %ymm1, %ymm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpor (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vpsadbw %ymm0, %ymm1, %ymm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s index 71ffbbbe13f..0da66b3e1a3 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-f16c.s @@ -45,15 +45,15 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 2.67 2.67 1.67 1.67 2.00 8.67 - 0.67 +# CHECK-NEXT: - - 4.00 4.00 1.67 1.67 2.00 6.00 - 0.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtph2ps %xmm0, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtph2ps (%rax), %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtps2ph $0, %ymm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.33 0.33 1.00 1.33 - 0.33 vcvtps2ph $0, %ymm0, (%rax) +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtph2ps %xmm0, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vcvtph2ps (%rax), %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %xmm0, (%rax) +# CHECK-NEXT: - - 0.50 0.50 - - - 1.00 - - vcvtps2ph $0, %ymm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.33 0.33 1.00 1.00 - 0.33 vcvtps2ph $0, %ymm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s index ade33fb1033..a3a405f3ac1 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-fma.s @@ -298,197 +298,197 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 vfmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmadd231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmaddsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmaddsub231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmaddsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsub231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfmsub231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfmsubadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfmsubadd231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfmsubadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmadd231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231pd %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231ps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 11 0.50 * vfnmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231sd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub132ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub213ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 4 0.33 vfnmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 1 4 0.50 vfnmsub231ss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 9 0.50 * vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resources: @@ -505,199 +505,199 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 64.00 64.00 48.00 48.00 - 64.00 - - +# CHECK-NEXT: - - 96.00 96.00 48.00 48.00 - - - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmaddsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsub231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfmsubadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmadd231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmadd231ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231pd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231pd (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231sd %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231sd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub132ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub132ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub213ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub213ss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vfnmsub231ss %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vfnmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmaddsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsub231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfmsubadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmadd231ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231pd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231sd (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub132ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub213ss (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfnmsub231ss (%rax), %xmm1, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s index a89aba46903..9088a770530 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse1.s @@ -194,17 +194,17 @@ xorps (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 addps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 addss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * addss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andnps %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andnps (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andps %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cmpps $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmpps $0, %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cmpps $0, (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cmpss $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmpss $0, %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cmpss $0, (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 comiss %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * comiss (%rax), %xmm1 @@ -217,12 +217,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 9 1.00 * cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %ecx -# CHECK-NEXT: 2 6 1.00 cvtss2si %xmm0, %rcx +# CHECK-NEXT: 3 7 1.00 cvtss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %ecx # CHECK-NEXT: 3 11 1.00 * cvtss2si (%rax), %rcx # CHECK-NEXT: 2 5 1.00 cvttps2pi %xmm0, %mm2 # CHECK-NEXT: 2 9 0.50 * cvttps2pi (%rax), %mm2 -# CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %ecx +# CHECK-NEXT: 2 6 1.00 cvttss2si %xmm0, %ecx # CHECK-NEXT: 3 7 1.00 cvttss2si %xmm0, %rcx # CHECK-NEXT: 3 11 1.00 * cvttss2si (%rax), %ecx # CHECK-NEXT: 4 12 1.00 * cvttss2si (%rax), %rcx @@ -232,13 +232,13 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 2 16 3.00 * divss (%rax), %xmm2 # CHECK-NEXT: 3 7 1.00 * * * ldmxcsr (%rax) # CHECK-NEXT: 1 1 1.00 * * * maskmovq %mm0, %mm1 -# CHECK-NEXT: 1 4 0.33 maxps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * maxps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 maxss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * maxss (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * minps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * minss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movaps %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movaps %xmm0, (%rax) @@ -258,9 +258,9 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movups %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movups %xmm0, (%rax) # CHECK-NEXT: 1 6 0.50 * movups (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * mulps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * mulss (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 orps %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * orps (%rax), %xmm2 @@ -306,9 +306,9 @@ xorps (%rax), %xmm2 # CHECK-NEXT: 1 12 3.00 sqrtss %xmm0, %xmm2 # CHECK-NEXT: 2 17 3.00 * sqrtss (%rax), %xmm2 # CHECK-NEXT: 3 2 1.00 * * * stmxcsr (%rax) -# CHECK-NEXT: 1 4 0.33 subps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * subps (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 subss %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subss %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * subss (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 ucomiss %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * ucomiss (%rax), %xmm1 @@ -333,25 +333,25 @@ xorps (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 26.00 65.50 18.50 32.00 32.00 8.00 45.50 0.50 3.00 +# CHECK-NEXT: - 26.00 65.83 25.83 32.00 32.00 8.00 37.83 0.50 3.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpps $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpps $0, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpss $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpss $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpps $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpps $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpss $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpss $0, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - comiss %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comiss (%rax), %xmm1 -# CHECK-NEXT: - - 2.00 - - - - - - - cvtpi2ps %mm0, %xmm2 +# CHECK-NEXT: - - - - - - - 2.00 - - cvtpi2ps %mm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - cvtpi2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pi (%rax), %mm2 @@ -359,15 +359,15 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - cvtsi2ssq %rcx, %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 0.50 0.50 - - - - cvtsi2ssl (%rax), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtss2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvtss2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtss2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttps2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2pi (%rax), %mm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttss2si %xmm0, %ecx # CHECK-NEXT: - - 1.33 0.33 - - - 1.33 - - cvttss2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttss2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttss2si (%rax), %ecx # CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 1.33 - - cvttss2si (%rax), %rcx # CHECK-NEXT: - 3.00 1.00 - - - - - - - divps %xmm0, %xmm2 # CHECK-NEXT: - 5.00 1.00 - 0.50 0.50 - - - - divps (%rax), %xmm2 @@ -375,14 +375,14 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - divss (%rax), %xmm2 # CHECK-NEXT: - - 1.25 0.25 0.50 0.50 - 0.25 0.25 - ldmxcsr (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - maskmovq %mm0, %mm1 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxss (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movaps %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movaps %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movaps (%rax), %xmm2 @@ -401,10 +401,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movups %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movups %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movups (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orps (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pavgb %mm0, %mm2 @@ -449,10 +449,10 @@ xorps (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - - - - - - - sqrtss %xmm0, %xmm2 # CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - sqrtss (%rax), %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 stmxcsr (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subps (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subss %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subss (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subss %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subss (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - ucomiss %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomiss (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 1.00 - - unpckhps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s index dddf1119dd4..d39f7c0dd9d 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse2.s @@ -402,24 +402,24 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 addpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 addsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * addsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andnpd %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andnpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 andpd %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * andpd (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 * * * clflush (%rax) -# CHECK-NEXT: 1 4 0.33 cmppd $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmppd $0, %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cmppd $0, (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cmpsd $0, %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cmpsd $0, %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cmpsd $0, (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 comisd %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * comisd (%rax), %xmm1 -# CHECK-NEXT: 2 5 1.00 cvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2dq (%rax), %xmm2 @@ -427,9 +427,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2pi (%rax), %mm2 # CHECK-NEXT: 2 5 1.00 cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: 3 11 1.00 * cvtpd2ps (%rax), %xmm2 -# CHECK-NEXT: 2 5 1.00 cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtpi2pd %mm0, %xmm2 +# CHECK-NEXT: 2 9 0.50 * cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: 1 4 0.50 cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cvtps2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvtps2pd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * cvtps2pd (%rax), %xmm2 @@ -449,7 +449,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 3 11 1.00 * cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: 2 5 1.00 cvttpd2pi %xmm0, %mm2 # CHECK-NEXT: 3 11 1.00 * cvttpd2pi (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 cvttps2dq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * cvttps2dq (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %ecx # CHECK-NEXT: 2 6 1.00 cvttsd2si %xmm0, %rcx @@ -461,13 +461,13 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 19 4.00 * divsd (%rax), %xmm2 # CHECK-NEXT: 2 2 0.50 * * * lfence # CHECK-NEXT: 2 1 1.00 * * * maskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: 1 4 0.33 maxpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * maxpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 maxsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 maxsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * maxsd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * minpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 minsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 minsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * minsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movapd %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movapd %xmm0, (%rax) @@ -504,9 +504,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 movupd %xmm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * movupd %xmm0, (%rax) # CHECK-NEXT: 1 6 0.50 * movupd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * mulpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 mulsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 mulsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * mulsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 orpd %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * orpd (%rax), %xmm2 @@ -555,7 +555,7 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * pcmpgtw (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 pextrw $1, %xmm0, %ecx -# CHECK-NEXT: 1 4 0.33 pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmaddwd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmaddwd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * pmaxsw (%rax), %xmm2 @@ -566,15 +566,15 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * pminub (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx -# CHECK-NEXT: 1 4 0.33 pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmulhuw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmulhuw (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 pmulhw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmulhw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmulhw (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 pmullw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmullw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmullw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmuludq %mm0, %mm2 # CHECK-NEXT: 2 9 1.00 * pmuludq (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 pmuludq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmuludq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmuludq (%rax), %xmm2 # CHECK-NEXT: 1 1 0.33 por %xmm0, %xmm2 # CHECK-NEXT: 2 7 0.50 * por (%rax), %xmm2 @@ -654,9 +654,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 2 24 6.00 * sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1 18 6.00 sqrtsd %xmm0, %xmm2 # CHECK-NEXT: 2 23 6.00 * sqrtsd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 subpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * subpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 subsd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 subsd %xmm0, %xmm2 # CHECK-NEXT: 2 9 0.50 * subsd (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 ucomisd %xmm0, %xmm1 # CHECK-NEXT: 2 7 1.00 * ucomisd (%rax), %xmm1 @@ -681,28 +681,28 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 38.00 101.08 69.08 62.67 62.67 14.00 112.08 1.75 4.67 +# CHECK-NEXT: - 38.00 103.08 82.08 62.67 62.67 14.00 94.08 1.75 4.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andnpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andnpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - andpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - andpd (%rax), %xmm2 # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 1.25 - clflush (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmppd $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmppd $0, (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cmpsd $0, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cmpsd $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmppd $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmppd $0, (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cmpsd $0, %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cmpsd $0, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - comisd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - comisd (%rax), %xmm1 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtdq2pd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2pd %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtdq2pd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtdq2ps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtdq2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtdq2ps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2dq (%rax), %xmm2 @@ -710,16 +710,16 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2pi (%rax), %mm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtpd2ps %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtpd2ps (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 - - - - 1.00 - - cvtpi2pd %mm0, %xmm2 -# CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - cvtpi2pd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvtps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtpi2pd %mm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - cvtpi2pd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvtps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtps2pd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvtps2pd (%rax), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvtsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvtsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvtsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvtsd2si (%rax), %rcx # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsd2ss %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvtsd2ss (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvtsi2sdl %ecx, %xmm2 @@ -732,26 +732,26 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2dq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - cvttpd2pi %xmm0, %mm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 1.33 - - cvttpd2pi (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - cvttps2dq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - cvttps2dq %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - cvttps2dq (%rax), %xmm2 -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %ecx -# CHECK-NEXT: - - 1.33 0.33 - - - 0.33 - - cvttsd2si %xmm0, %rcx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %ecx -# CHECK-NEXT: - - 1.33 0.33 0.50 0.50 - 0.33 - - cvttsd2si (%rax), %rcx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %ecx +# CHECK-NEXT: - - 1.00 1.00 - - - - - - cvttsd2si %xmm0, %rcx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %ecx +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - cvttsd2si (%rax), %rcx # CHECK-NEXT: - 3.00 1.00 - - - - - - - divpd %xmm0, %xmm2 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divpd (%rax), %xmm2 # CHECK-NEXT: - 3.00 1.00 - - - - - - - divsd %xmm0, %xmm2 # CHECK-NEXT: - 4.00 1.00 - 0.50 0.50 - - - - divsd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - 0.50 0.50 - lfence # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 maskmovdqu %xmm0, %xmm1 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - maxsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - maxsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - minsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - minsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - maxsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - maxsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - minsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - minsd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movapd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movapd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movapd (%rax), %xmm2 @@ -787,10 +787,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 movupd %xmm0, (%rax) # CHECK-NEXT: - - - - 0.50 0.50 - - - - movupd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - mulsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - mulsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - mulsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - mulsd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - orpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - orpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - packssdw %xmm0, %xmm2 @@ -838,8 +838,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pcmpgtw %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pcmpgtw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrw $1, %xmm0, %ecx -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddwd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddwd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddwd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddwd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxsw %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaxsw (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaxub %xmm0, %xmm2 @@ -849,16 +849,16 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - 0.50 0.50 - - - - - - pminub %xmm0, %xmm2 # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pminub (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmovmskb %xmm0, %ecx -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhuw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhuw (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhw (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmullw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmullw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhuw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhuw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmullw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmullw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmuludq %mm0, %mm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmuludq (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuludq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuludq (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuludq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuludq (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - por %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - por (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - psadbw %xmm0, %xmm2 @@ -937,10 +937,10 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtpd (%rax), %xmm2 # CHECK-NEXT: - 6.00 1.00 - - - - - - - sqrtsd %xmm0, %xmm2 # CHECK-NEXT: - 6.00 1.00 - 0.50 0.50 - - - - sqrtsd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - subsd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - subsd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - subsd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - subsd (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - ucomisd %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - ucomisd (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 1.00 - - unpckhpd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s index 5367f880fc8..4fac1f6a504 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse3.s @@ -39,9 +39,9 @@ movsldup (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.33 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addsubpd %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addsubpd (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 addsubps %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 addsubps %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * addsubps (%rax), %xmm2 # CHECK-NEXT: 3 6 2.00 haddpd %xmm0, %xmm2 # CHECK-NEXT: 4 12 2.00 * haddpd (%rax), %xmm2 @@ -73,14 +73,14 @@ movsldup (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 4.00 4.00 5.00 5.00 - 23.00 - - +# CHECK-NEXT: - - 4.67 4.67 5.00 5.00 - 21.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubpd (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - addsubps %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - addsubps (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubpd (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - addsubps %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - addsubps (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddpd %xmm0, %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - haddpd (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 - - - 2.33 - - haddps %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s index 0d39529338e..d49f33cce15 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s @@ -189,8 +189,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx # CHECK-NEXT: 3 2 1.00 * pextrq $1, %xmm0, (%rax) # CHECK-NEXT: 3 2 1.00 * pextrw $1, %xmm0, (%rax) -# CHECK-NEXT: 1 4 0.33 phminposuw %xmm0, %xmm2 -# CHECK-NEXT: 2 10 0.50 * phminposuw (%rax), %xmm2 +# CHECK-NEXT: 1 4 1.00 phminposuw %xmm0, %xmm2 +# CHECK-NEXT: 2 10 1.00 * phminposuw (%rax), %xmm2 # CHECK-NEXT: 2 2 2.00 pinsrb $1, %eax, %xmm1 # CHECK-NEXT: 2 6 1.00 * pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: 2 2 2.00 pinsrd $1, %eax, %xmm1 @@ -237,20 +237,20 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 2 6 1.00 * pmovzxwd (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pmovzxwq %xmm0, %xmm2 # CHECK-NEXT: 2 6 1.00 * pmovzxwq (%rax), %xmm2 -# CHECK-NEXT: 1 4 0.33 pmuldq %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmuldq %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmuldq (%rax), %xmm2 -# CHECK-NEXT: 2 10 0.67 pmulld %xmm0, %xmm2 -# CHECK-NEXT: 3 16 0.67 * pmulld (%rax), %xmm2 +# CHECK-NEXT: 2 10 1.00 pmulld %xmm0, %xmm2 +# CHECK-NEXT: 3 16 1.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 ptest %xmm0, %xmm1 # CHECK-NEXT: 3 9 1.00 * ptest (%rax), %xmm1 -# CHECK-NEXT: 2 8 0.67 roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundps $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: 2 8 0.67 roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: 3 14 0.67 * roundss $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundps $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: 2 8 1.00 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 3 14 1.00 * roundss $1, (%rax), %xmm2 # CHECK: Resources: # CHECK-NEXT: [0] - SKXDivider @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 31.67 25.67 23.67 23.67 5.00 74.67 - 1.67 +# CHECK-NEXT: - - 36.67 28.67 23.67 23.67 5.00 66.67 - 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -304,8 +304,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - pextrq $1, %xmm0, %rcx # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrq $1, %xmm0, (%rax) # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 pextrw $1, %xmm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - phminposuw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - phminposuw (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 - - - - - - - phminposuw %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - phminposuw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 2.00 - - pinsrb $1, %eax, %xmm1 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pinsrb $1, (%rax), %xmm1 # CHECK-NEXT: - - - - - - - 2.00 - - pinsrd $1, %eax, %xmm1 @@ -352,17 +352,17 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwd (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - pmovzxwq %xmm0, %xmm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pmovzxwq (%rax), %xmm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmuldq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmuldq (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - pmulld %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - pmulld (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmuldq %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmuldq (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - pmulld %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - pmulld (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundpd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundps $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundsd $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: - - 0.67 0.67 - - - 0.67 - - roundss $1, %xmm0, %xmm2 -# CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - roundss $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2 +# CHECK-NEXT: - - 1.00 1.00 - - - - - - roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s index b4db1a2b649..a213d059e60 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-ssse3.s @@ -148,11 +148,11 @@ psignw (%rax), %xmm2 # CHECK-NEXT: 4 9 2.00 * phsubw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmaddubsw %mm0, %mm2 # CHECK-NEXT: 2 9 1.00 * pmaddubsw (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmaddubsw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmaddubsw (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 pmulhrsw %mm0, %mm2 # CHECK-NEXT: 2 9 1.00 * pmulhrsw (%rax), %mm2 -# CHECK-NEXT: 1 4 0.33 pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: 1 4 0.50 pmulhrsw %xmm0, %xmm2 # CHECK-NEXT: 2 10 0.50 * pmulhrsw (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 pshufb %mm0, %mm2 # CHECK-NEXT: 2 6 1.00 * pshufb (%rax), %mm2 @@ -185,7 +185,7 @@ psignw (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 30.00 12.00 16.00 16.00 - 70.00 - - +# CHECK-NEXT: - - 30.67 12.67 16.00 16.00 - 68.67 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -231,12 +231,12 @@ psignw (%rax), %xmm2 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 2.33 - - phsubw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmaddubsw %mm0, %mm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmaddubsw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmaddubsw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmaddubsw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmaddubsw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmaddubsw (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - pmulhrsw %mm0, %mm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - pmulhrsw (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - pmulhrsw %xmm0, %xmm2 -# CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - pmulhrsw (%rax), %xmm2 +# CHECK-NEXT: - - 0.50 0.50 - - - - - - pmulhrsw %xmm0, %xmm2 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - pmulhrsw (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 - - pshufb %mm0, %mm2 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - pshufb (%rax), %mm2 # CHECK-NEXT: - - - - - - - 1.00 - - pshufb %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s index 11d6a569605..fd5823e01a3 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-x86_64.s @@ -822,13 +822,13 @@ xorq (%rax), %rdi # CHECK-NEXT: 3 7 1.00 * * negl (%rax) # CHECK-NEXT: 1 1 0.25 negq %rcx # CHECK-NEXT: 3 7 1.00 * * negq (%r10) -# CHECK-NEXT: 1 1 0.25 nop -# CHECK-NEXT: 1 1 0.25 nopw %di -# CHECK-NEXT: 1 1 0.25 nopw (%rcx) -# CHECK-NEXT: 1 1 0.25 nopl %esi -# CHECK-NEXT: 1 1 0.25 nopl (%r8) -# CHECK-NEXT: 1 1 0.25 nopq %rdx -# CHECK-NEXT: 1 1 0.25 nopq (%r9) +# CHECK-NEXT: 1 1 0.17 nop +# CHECK-NEXT: 1 1 0.17 nopw %di +# CHECK-NEXT: 1 1 0.17 nopw (%rcx) +# CHECK-NEXT: 1 1 0.17 nopl %esi +# CHECK-NEXT: 1 1 0.17 nopl (%r8) +# CHECK-NEXT: 1 1 0.17 nopq %rdx +# CHECK-NEXT: 1 1 0.17 nopq (%r9) # CHECK-NEXT: 1 1 0.25 notb %dil # CHECK-NEXT: 3 7 1.00 * * notb (%r8) # CHECK-NEXT: 1 1 0.25 notw %si @@ -1164,7 +1164,7 @@ xorq (%rax), %rdi # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: 60.00 - 431.50 225.50 202.00 202.00 167.00 186.00 416.00 69.00 +# CHECK-NEXT: 60.00 - 429.75 223.75 202.00 202.00 167.00 184.25 414.25 69.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1381,13 +1381,13 @@ xorq (%rax), %rdi # CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negl (%rax) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - negq %rcx # CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 negq (%r10) -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nop -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw %di -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopw (%rcx) -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl %esi -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopl (%r8) -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq %rdx -# CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - nopq (%r9) +# CHECK-NEXT: - - - - - - - - - - nop +# CHECK-NEXT: - - - - - - - - - - nopw %di +# CHECK-NEXT: - - - - - - - - - - nopw (%rcx) +# CHECK-NEXT: - - - - - - - - - - nopl %esi +# CHECK-NEXT: - - - - - - - - - - nopl (%r8) +# CHECK-NEXT: - - - - - - - - - - nopq %rdx +# CHECK-NEXT: - - - - - - - - - - nopq (%r9) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notb %dil # CHECK-NEXT: - - 0.25 0.25 0.83 0.83 1.00 0.25 0.25 0.33 notb (%r8) # CHECK-NEXT: - - 0.25 0.25 - - - 0.25 0.25 - notw %si |