summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-11-06 18:59:45 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-11-06 19:08:15 +0000
commitad70d5f39ae99d9f5be582ad8979830f588e6802 (patch)
treeb6ed036f79b2e9bc74f5d35054b6ebedc864118d
parentca7f5becf9b71c23c8980d3a7280e4408e28bad2 (diff)
downloadbcm5719-llvm-ad70d5f39ae99d9f5be582ad8979830f588e6802.tar.gz
bcm5719-llvm-ad70d5f39ae99d9f5be582ad8979830f588e6802.zip
[X86] Fix SLM v2f64 ADD/MUL + FP BLEND/HADD instruction schedules
Noticed while fixing the reduction costs for D59710 - the SLM model doesn't account for the poor throughput of v2f64/v2i64 ops.
-rw-r--r--llvm/lib/Target/X86/X86ScheduleSLM.td14
-rw-r--r--llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s26
-rw-r--r--llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s42
-rw-r--r--llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s18
4 files changed, 50 insertions, 50 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td
index 8e3ce721f1a..84aac01ab38 100644
--- a/llvm/lib/Target/X86/X86ScheduleSLM.td
+++ b/llvm/lib/Target/X86/X86ScheduleSLM.td
@@ -202,8 +202,8 @@ defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>;
defm : X86WriteResPairUnsupported<WriteFAddZ>;
defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>;
-defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>;
+defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 4, [2]>;
+defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 4, [2]>;
defm : X86WriteResPairUnsupported<WriteFAdd64Z>;
defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>;
defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>;
@@ -219,8 +219,8 @@ defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>
defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
defm : X86WriteResPairUnsupported<WriteFMulZ>;
defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
-defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>;
+defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>;
+defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>;
defm : X86WriteResPairUnsupported<WriteFMul64Z>;
defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>;
defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>;
@@ -380,8 +380,8 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> {
// Horizontal add/sub instructions.
////////////////////////////////////////////////////////////////////////////////
-defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>;
-defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>;
+defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 6, [6], 4>;
+defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 6, [6], 4>;
defm : X86WriteResPairUnsupported<WriteFHAddZ>;
defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>;
defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>;
@@ -486,7 +486,7 @@ defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>;
defm : X86WriteResPairUnsupported<WriteVarBlendY>;
defm : X86WriteResPairUnsupported<WriteVarBlendZ>;
-defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>;
+defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 3>;
defm : X86WriteResPairUnsupported<WriteFVarBlendY>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
defm : X86WriteResPairUnsupported<WriteFShuffle256>;
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
index 477a9ce97ec..92f33a20839 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s
@@ -407,8 +407,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * addpd (%rax), %xmm2
+# CHECK-NEXT: 1 4 2.00 addpd %xmm0, %xmm2
+# CHECK-NEXT: 1 7 2.00 * addpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2
@@ -510,8 +510,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 * movupd (%rax), %xmm2
-# CHECK-NEXT: 1 5 2.00 mulpd %xmm0, %xmm2
-# CHECK-NEXT: 1 8 2.00 * mulpd (%rax), %xmm2
+# CHECK-NEXT: 1 7 4.00 mulpd %xmm0, %xmm2
+# CHECK-NEXT: 1 10 4.00 * mulpd (%rax), %xmm2
# CHECK-NEXT: 1 5 2.00 mulsd %xmm0, %xmm2
# CHECK-NEXT: 1 8 2.00 * mulsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2
@@ -662,8 +662,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * subpd (%rax), %xmm2
+# CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2
+# CHECK-NEXT: 1 7 2.00 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * subsd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 ucomisd %xmm0, %xmm1
@@ -687,12 +687,12 @@ xorpd (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - 412.00 8.00 152.50 86.50 3.00 3.00 134.00
+# CHECK-NEXT: - 412.00 12.00 152.50 90.50 3.00 3.00 134.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
-# CHECK-NEXT: - - - - 1.00 - - - addpd %xmm0, %xmm2
-# CHECK-NEXT: - - - - 1.00 - - 1.00 addpd (%rax), %xmm2
+# CHECK-NEXT: - - - - 2.00 - - - addpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 2.00 - - 1.00 addpd (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - addsd %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 addsd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - andnpd %xmm0, %xmm2
@@ -794,8 +794,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - movupd %xmm0, %xmm2
# CHECK-NEXT: - - - - - - - 1.00 movupd %xmm0, (%rax)
# CHECK-NEXT: - - - - - - - 1.00 movupd (%rax), %xmm2
-# CHECK-NEXT: - - 2.00 1.00 - - - - mulpd %xmm0, %xmm2
-# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 mulpd (%rax), %xmm2
+# CHECK-NEXT: - - 4.00 1.00 - - - - mulpd %xmm0, %xmm2
+# CHECK-NEXT: - - 4.00 1.00 - - - 1.00 mulpd (%rax), %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - - mulsd %xmm0, %xmm2
# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 mulsd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 0.50 - - - orpd %xmm0, %xmm2
@@ -946,8 +946,8 @@ xorpd (%rax), %xmm2
# CHECK-NEXT: - 70.00 - 1.00 - - - 1.00 sqrtpd (%rax), %xmm2
# CHECK-NEXT: - 35.00 - 1.00 - - - - sqrtsd %xmm0, %xmm2
# CHECK-NEXT: - 35.00 - 1.00 - - - 1.00 sqrtsd (%rax), %xmm2
-# CHECK-NEXT: - - - - 1.00 - - - subpd %xmm0, %xmm2
-# CHECK-NEXT: - - - - 1.00 - - 1.00 subpd (%rax), %xmm2
+# CHECK-NEXT: - - - - 2.00 - - - subpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 2.00 - - 1.00 subpd (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - subsd %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 subsd (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - ucomisd %xmm0, %xmm1
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
index 484c353b07b..23949737b3c 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s
@@ -43,18 +43,18 @@ mwait
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * addsubpd (%rax), %xmm2
+# CHECK-NEXT: 1 4 2.00 addsubpd %xmm0, %xmm2
+# CHECK-NEXT: 1 7 2.00 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * addsubps (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 haddpd %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * haddpd (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 haddps %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * haddps (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 hsubpd %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * hsubpd (%rax), %xmm2
-# CHECK-NEXT: 1 3 1.00 hsubps %xmm0, %xmm2
-# CHECK-NEXT: 1 6 1.00 * hsubps (%rax), %xmm2
+# CHECK-NEXT: 4 6 3.00 haddpd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 3.00 * haddpd (%rax), %xmm2
+# CHECK-NEXT: 4 6 3.00 haddps %xmm0, %xmm2
+# CHECK-NEXT: 4 9 3.00 * haddps (%rax), %xmm2
+# CHECK-NEXT: 4 6 3.00 hsubpd %xmm0, %xmm2
+# CHECK-NEXT: 4 9 3.00 * hsubpd (%rax), %xmm2
+# CHECK-NEXT: 4 6 3.00 hsubps %xmm0, %xmm2
+# CHECK-NEXT: 4 9 3.00 * hsubps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 * lddqu (%rax), %xmm2
# CHECK-NEXT: 1 100 1.00 U monitor
# CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2
@@ -77,22 +77,22 @@ mwait
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - - 16.00 12.00 - - 10.00
+# CHECK-NEXT: - - - 32.00 30.00 - - 10.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
-# CHECK-NEXT: - - - - 1.00 - - - addsubpd %xmm0, %xmm2
-# CHECK-NEXT: - - - - 1.00 - - 1.00 addsubpd (%rax), %xmm2
+# CHECK-NEXT: - - - - 2.00 - - - addsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - - - 2.00 - - 1.00 addsubpd (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 addsubps (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - - haddpd %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 haddpd (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - - haddps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 haddps (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - - hsubpd %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 hsubpd (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - - hsubps %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 hsubps (%rax), %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - - haddpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddpd (%rax), %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - - haddps %xmm0, %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddps (%rax), %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - - hsubpd %xmm0, %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubpd (%rax), %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - - hsubps %xmm0, %xmm2
+# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - - - - 1.00 lddqu (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - monitor
# CHECK-NEXT: - - - 1.00 - - - - movddup %xmm0, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
index ce74a7625e7..0dc83a0fb62 100644
--- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
+++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s
@@ -159,10 +159,10 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: 1 4 1.00 * blendpd $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 blendps $11, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * blendps $11, (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 blendvpd %xmm0, %xmm0, %xmm2
-# CHECK-NEXT: 1 4 1.00 * blendvpd %xmm0, (%rax), %xmm2
-# CHECK-NEXT: 1 1 1.00 blendvps %xmm0, %xmm0, %xmm2
-# CHECK-NEXT: 1 4 1.00 * blendvps %xmm0, (%rax), %xmm2
+# CHECK-NEXT: 3 4 4.00 blendvpd %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2
+# CHECK-NEXT: 3 4 4.00 blendvps %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2
@@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
-# CHECK-NEXT: - - - 73.00 22.00 - - 54.00
+# CHECK-NEXT: - - - 85.00 22.00 - - 54.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -272,10 +272,10 @@ roundss $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 blendpd $11, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - blendps $11, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - 1.00 blendps $11, (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - blendvpd %xmm0, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2
-# CHECK-NEXT: - - - 1.00 - - - - blendvps %xmm0, %xmm0, %xmm2
-# CHECK-NEXT: - - - 1.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2
+# CHECK-NEXT: - - - 4.00 - - - - blendvpd %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2
+# CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2
+# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2
OpenPOWER on IntegriCloud