1 files changed, 41 insertions, 83 deletions
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 0526a3023a4..23ef6bfd86f 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -77,15 +77,21 @@ def : ReadAdvance<ReadAfterLd, 5>;
 // This multiclass defines the resource usage for variants with and without
 // folded loads.
 multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
-                          ProcResourceKind ExePort,
-                          int Lat> {
+                          list<ProcResourceKind> ExePorts,
+                          int Lat, list<int> Res = [1], int UOps = 1> {
   // Register variant is using a single cycle on ExePort.
-  def : WriteRes<SchedRW, [ExePort]> { let Latency = Lat; }
+  def : WriteRes<SchedRW, ExePorts> {
+    let Latency = Lat;
+    let ResourceCycles = Res;
+    let NumMicroOps = UOps;
+  }
 
   // Memory variant also uses a cycle on port 2/3 and adds 5 cycles to the
   // latency.
-  def : WriteRes<SchedRW.Folded, [SKLPort23, ExePort]> {
-     let Latency = !add(Lat, 5);
+  def : WriteRes<SchedRW.Folded, !listconcat([SKLPort23], ExePorts)> {
+    let Latency = !add(Lat, 5);
+    let ResourceCycles = !listconcat([1], Res);
+    let NumMicroOps = UOps;
   }
 }
 
@@ -94,8 +100,8 @@ multiclass SKLWriteResPair<X86FoldableSchedWrite SchedRW,
 def : WriteRes<WriteRMW, [SKLPort4]>;
 
 // Arithmetic.
-defm : SKLWriteResPair<WriteALU,   SKLPort0156, 1>; // Simple integer ALU op.
-defm : SKLWriteResPair<WriteIMul,  SKLPort1,   3>; // Integer multiplication.
+defm : SKLWriteResPair<WriteALU,   [SKLPort0156], 1>; // Simple integer ALU op.
+defm : SKLWriteResPair<WriteIMul,  [SKLPort1],   3>; // Integer multiplication.
 def : WriteRes<WriteIMulH, []> { let Latency = 3; } // Integer multiplication, high part.
 def SKLDivider : ProcResource<1>; // Integer division issued on port 0.     
 def : WriteRes<WriteIDiv, [SKLPort0, SKLDivider]> { // Integer division.
@@ -110,7 +116,7 @@ def : WriteRes<WriteIDivLd, [SKLPort23, SKLPort0, SKLDivider]> {
 def : WriteRes<WriteLEA, [SKLPort15]>; // LEA instructions can't fold loads.
 
 // Integer shifts and rotates.
-defm : SKLWriteResPair<WriteShift, SKLPort06,  1>;
+defm : SKLWriteResPair<WriteShift, [SKLPort06],  1>;
 
 // Loads, stores, and moves, not folded with other operations.
 def : WriteRes<WriteLoad,  [SKLPort23]> { let Latency = 5; }
@@ -123,30 +129,23 @@ def : WriteRes<WriteZero,  []>;
 
 // Branches don't produce values, so they have no latency, but they still
 // consume resources. Indirect branches can fold loads.
-defm : SKLWriteResPair<WriteJump,  SKLPort06,   1>;
+defm : SKLWriteResPair<WriteJump,  [SKLPort06],   1>;
 
 // Floating point. This covers both scalar and vector operations.
 def  : WriteRes<WriteFLoad,         [SKLPort23]> { let Latency = 6; }
 def  : WriteRes<WriteFStore,        [SKLPort237, SKLPort4]>;
 def  : WriteRes<WriteFMove,         [SKLPort015]>;
 
-defm : SKLWriteResPair<WriteFAdd,   SKLPort1, 3>; // Floating point add/sub/compare.
-defm : SKLWriteResPair<WriteFMul,   SKLPort0, 5>; // Floating point multiplication.
-defm : SKLWriteResPair<WriteFDiv,   SKLPort0, 12>; // 10-14 cycles. // Floating point division.
-defm : SKLWriteResPair<WriteFSqrt,  SKLPort0, 15>; // Floating point square root.
-defm : SKLWriteResPair<WriteFRcp,   SKLPort0, 5>; // Floating point reciprocal estimate.
-defm : SKLWriteResPair<WriteFRsqrt, SKLPort0, 5>; // Floating point reciprocal square root estimate.
-defm : SKLWriteResPair<WriteFMA,    SKLPort01, 4>; // Fused Multiply Add.
-defm : SKLWriteResPair<WriteFShuffle,  SKLPort5,  1>; // Floating point vector shuffles.
-defm : SKLWriteResPair<WriteFBlend,  SKLPort015,  1>; // Floating point vector blends.
-def : WriteRes<WriteFVarBlend, [SKLPort5]> { // Fp vector variable blends.	       
-  let Latency = 2;
-  let ResourceCycles = [2];
-} 
-def : WriteRes<WriteFVarBlendLd, [SKLPort5, SKLPort23]> {
-  let Latency = 6;
-  let ResourceCycles = [2, 1];
-}
+defm : SKLWriteResPair<WriteFAdd,   [SKLPort1], 3>; // Floating point add/sub/compare.
+defm : SKLWriteResPair<WriteFMul,   [SKLPort0], 5>; // Floating point multiplication.
+defm : SKLWriteResPair<WriteFDiv,   [SKLPort0], 12>; // 10-14 cycles. // Floating point division.
+defm : SKLWriteResPair<WriteFSqrt,  [SKLPort0], 15>; // Floating point square root.
+defm : SKLWriteResPair<WriteFRcp,   [SKLPort0], 5>; // Floating point reciprocal estimate.
+defm : SKLWriteResPair<WriteFRsqrt, [SKLPort0], 5>; // Floating point reciprocal square root estimate.
+defm : SKLWriteResPair<WriteFMA,    [SKLPort01], 4>; // Fused Multiply Add.
+defm : SKLWriteResPair<WriteFShuffle,  [SKLPort5],  1>; // Floating point vector shuffles.
+defm : SKLWriteResPair<WriteFBlend,  [SKLPort015],  1>; // Floating point vector blends.
+defm : SKLWriteResPair<WriteFVarBlend,  [SKLPort5], 2, [2]>; // Fp vector variable blends.
 
 // FMA Scheduling helper class.
 // class FMASC { X86FoldableSchedWrite Sched = WriteFAdd; }
@@ -156,38 +155,22 @@ def  : WriteRes<WriteVecLoad,         [SKLPort23]> { let Latency = 6; }
 def  : WriteRes<WriteVecStore,        [SKLPort237, SKLPort4]>;
 def  : WriteRes<WriteVecMove,         [SKLPort015]>;
 
-defm : SKLWriteResPair<WriteVecALU,   SKLPort15,  1>; // Vector integer ALU op, no logicals.
-defm : SKLWriteResPair<WriteVecShift, SKLPort0,  1>; // Vector integer shifts.
-defm : SKLWriteResPair<WriteVecIMul,  SKLPort0,   5>; // Vector integer multiply.
-defm : SKLWriteResPair<WriteShuffle,  SKLPort5,  1>; // Vector shuffles.
-defm : SKLWriteResPair<WriteBlend,  SKLPort15,  1>; // Vector blends.
-
-def : WriteRes<WriteVarBlend, [SKLPort5]> { // Vector variable blends.
-  let Latency = 2;
-  let ResourceCycles = [2];
-}
-def : WriteRes<WriteVarBlendLd, [SKLPort5, SKLPort23]> {
-  let Latency = 6;
-  let ResourceCycles = [2, 1];
-}
-
-def : WriteRes<WriteMPSAD, [SKLPort0, SKLPort5]> { // Vector MPSAD.     
-  let Latency = 6;
-  let ResourceCycles = [1, 2];
-}
-def : WriteRes<WriteMPSADLd, [SKLPort23, SKLPort0, SKLPort5]> {
-  let Latency = 6;
-  let ResourceCycles = [1, 1, 2];
-}
+defm : SKLWriteResPair<WriteVecALU,   [SKLPort15],  1>; // Vector integer ALU op, no logicals.
+defm : SKLWriteResPair<WriteVecShift, [SKLPort0],  1>; // Vector integer shifts.
+defm : SKLWriteResPair<WriteVecIMul,  [SKLPort0],   5>; // Vector integer multiply.
+defm : SKLWriteResPair<WriteShuffle,  [SKLPort5],  1>; // Vector shuffles.
+defm : SKLWriteResPair<WriteBlend,  [SKLPort15],  1>; // Vector blends.
+defm : SKLWriteResPair<WriteVarBlend,  [SKLPort5], 2, [2]>; // Vector variable blends.
+defm : SKLWriteResPair<WriteMPSAD,  [SKLPort0, SKLPort5], 6, [1, 2]>; // Vector MPSAD.
 
 // Vector bitwise operations.
 // These are often used on both floating point and integer vectors.
-defm : SKLWriteResPair<WriteVecLogic, SKLPort015, 1>; // Vector and/or/xor.
+defm : SKLWriteResPair<WriteVecLogic, [SKLPort015], 1>; // Vector and/or/xor.
 
 // Conversion between integer and float.
-defm : SKLWriteResPair<WriteCvtF2I, SKLPort1, 3>; // Float -> Integer.
-defm : SKLWriteResPair<WriteCvtI2F, SKLPort1, 4>; // Integer -> Float.
-defm : SKLWriteResPair<WriteCvtF2F, SKLPort1, 3>; // Float -> Float size conversion.
+defm : SKLWriteResPair<WriteCvtF2I, [SKLPort1], 3>; // Float -> Integer.
+defm : SKLWriteResPair<WriteCvtI2F, [SKLPort1], 4>; // Integer -> Float.
+defm : SKLWriteResPair<WriteCvtF2F, [SKLPort1], 3>; // Float -> Float size conversion.
 
 // Strings instructions.
 // Packed Compare Implicit Length Strings, Return Mask
@@ -268,16 +251,9 @@ def : WriteRes<WriteCLMulLd, [SKLPort0, SKLPort5, SKLPort23]> {
 def : WriteRes<WriteSystem,     [SKLPort0156]> { let Latency = 100; } // def WriteSystem : SchedWrite;
 
 // AVX2.
-defm : SKLWriteResPair<WriteFShuffle256,  SKLPort5,  3>; // Fp 256-bit width vector shuffles.
-defm : SKLWriteResPair<WriteShuffle256,  SKLPort5,  3>;  // 256-bit width vector shuffles.
-def : WriteRes<WriteVarVecShift, [SKLPort0, SKLPort5]> { // Variable vector shifts.
-  let Latency = 2;
-  let ResourceCycles = [2, 1];
-}
-def : WriteRes<WriteVarVecShiftLd, [SKLPort0, SKLPort5, SKLPort23]> {
-  let Latency = 6;
-  let ResourceCycles = [2, 1, 1];
-}
+defm : SKLWriteResPair<WriteFShuffle256,  [SKLPort5],  3>; // Fp 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteShuffle256,  [SKLPort5],  3>;  // 256-bit width vector shuffles.
+defm : SKLWriteResPair<WriteVarVecShift,  [SKLPort0, SKLPort5], 2, [2, 1]>;  // Variable vector shifts.
 
 // Old microcoded instructions that nobody use.
 def : WriteRes<WriteMicrocoded, [SKLPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
@@ -291,27 +267,9 @@ def : WriteRes<WriteNop, []>;
 ////////////////////////////////////////////////////////////////////////////////
 // Horizontal add/sub  instructions.
 ////////////////////////////////////////////////////////////////////////////////
-// HADD, HSUB PS/PD
-// x,x / v,v,v.
-def : WriteRes<WriteFHAdd, [SKLPort1]> {
-  let Latency = 3;
-}
 
-// x,m / v,v,m.
-def : WriteRes<WriteFHAddLd, [SKLPort1, SKLPort23]> {
-  let Latency = 7;
-  let ResourceCycles = [1, 1];
-}
-
-// PHADD|PHSUB (S) W/D.
-// v <- v,v.
-def : WriteRes<WritePHAdd, [SKLPort15]>;
-
-// v <- v,m.
-def : WriteRes<WritePHAddLd, [SKLPort15, SKLPort23]> {
-  let Latency = 5;
-  let ResourceCycles = [1, 1];
-}
+defm : SKLWriteResPair<WriteFHAdd,   [SKLPort1], 3>;
+defm : SKLWriteResPair<WritePHAdd,  [SKLPort15], 1>;
 
 // Remaining instrs.