diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-11-06 18:59:45 +0000 |
---|---|---|
committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-11-06 19:08:15 +0000 |
commit | ad70d5f39ae99d9f5be582ad8979830f588e6802 (patch) | |
tree | b6ed036f79b2e9bc74f5d35054b6ebedc864118d | |
parent | ca7f5becf9b71c23c8980d3a7280e4408e28bad2 (diff) | |
download | bcm5719-llvm-ad70d5f39ae99d9f5be582ad8979830f588e6802.tar.gz bcm5719-llvm-ad70d5f39ae99d9f5be582ad8979830f588e6802.zip |
[X86] Fix SLM v2f64 ADD/MUL + FP BLEND/HADD instruction schedules
Noticed while fixing the reduction costs for D59710 - the SLM model doesn't account for the poor throughput of v2f64/v2i64 ops.
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleSLM.td | 14 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s | 26 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s | 42 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s | 18 |
4 files changed, 50 insertions, 50 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index 8e3ce721f1a..84aac01ab38 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -202,8 +202,8 @@ defm : SLMWriteResPair<WriteFAddX, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFAddY, [SLM_FPC_RSV1], 3>; defm : X86WriteResPairUnsupported<WriteFAddZ>; defm : SLMWriteResPair<WriteFAdd64, [SLM_FPC_RSV1], 3>; -defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 3>; -defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 3>; +defm : SLMWriteResPair<WriteFAdd64X, [SLM_FPC_RSV1], 4, [2]>; +defm : SLMWriteResPair<WriteFAdd64Y, [SLM_FPC_RSV1], 4, [2]>; defm : X86WriteResPairUnsupported<WriteFAdd64Z>; defm : SLMWriteResPair<WriteFCmp, [SLM_FPC_RSV1], 3>; defm : SLMWriteResPair<WriteFCmpX, [SLM_FPC_RSV1], 3>; @@ -219,8 +219,8 @@ defm : SLMWriteResPair<WriteFMulX, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]> defm : SLMWriteResPair<WriteFMulY, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; defm : X86WriteResPairUnsupported<WriteFMulZ>; defm : SLMWriteResPair<WriteFMul64, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; -defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; -defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 5, [1,2]>; +defm : SLMWriteResPair<WriteFMul64X, [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>; +defm : SLMWriteResPair<WriteFMul64Y, [SLM_FPC_RSV0, SLMFPMultiplier], 7, [1,4]>; defm : X86WriteResPairUnsupported<WriteFMul64Z>; defm : SLMWriteResPair<WriteFDiv, [SLM_FPC_RSV0, SLMFPDivider], 19, [1,17]>; defm : SLMWriteResPair<WriteFDivX, [SLM_FPC_RSV0, SLMFPDivider], 39, [1,39]>; @@ -380,8 +380,8 @@ def : WriteRes<WriteVecExtractSt, [SLM_FPC_RSV0, SLM_MEC_RSV]> { // Horizontal add/sub instructions. //////////////////////////////////////////////////////////////////////////////// -defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 3, [2]>; -defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 3, [2]>; +defm : SLMWriteResPair<WriteFHAdd, [SLM_FPC_RSV01], 6, [6], 4>; +defm : SLMWriteResPair<WriteFHAddY, [SLM_FPC_RSV01], 6, [6], 4>; defm : X86WriteResPairUnsupported<WriteFHAddZ>; defm : SLMWriteResPair<WritePHAdd, [SLM_FPC_RSV01], 1>; defm : SLMWriteResPair<WritePHAddX, [SLM_FPC_RSV01], 1>; @@ -486,7 +486,7 @@ defm : X86WriteResPairUnsupported<WriteFBlendZ>; defm : SLMWriteResPair<WriteVarBlend, [SLM_FPC_RSV0], 1>; defm : X86WriteResPairUnsupported<WriteVarBlendY>; defm : X86WriteResPairUnsupported<WriteVarBlendZ>; -defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 1>; +defm : SLMWriteResPair<WriteFVarBlend, [SLM_FPC_RSV0], 4, [4], 3>; defm : X86WriteResPairUnsupported<WriteFVarBlendY>; defm : X86WriteResPairUnsupported<WriteFVarBlendZ>; defm : X86WriteResPairUnsupported<WriteFShuffle256>; diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s index 477a9ce97ec..92f33a20839 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s @@ -407,8 +407,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * addpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 addpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 2.00 * addpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * addsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 andnpd %xmm0, %xmm2 @@ -510,8 +510,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movupd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax) # CHECK-NEXT: 1 3 1.00 * movupd (%rax), %xmm2 -# CHECK-NEXT: 1 5 2.00 mulpd %xmm0, %xmm2 -# CHECK-NEXT: 1 8 2.00 * mulpd (%rax), %xmm2 +# CHECK-NEXT: 1 7 4.00 mulpd %xmm0, %xmm2 +# CHECK-NEXT: 1 10 4.00 * mulpd (%rax), %xmm2 # CHECK-NEXT: 1 5 2.00 mulsd %xmm0, %xmm2 # CHECK-NEXT: 1 8 2.00 * mulsd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 orpd %xmm0, %xmm2 @@ -662,8 +662,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 74 70.00 * sqrtpd (%rax), %xmm2 # CHECK-NEXT: 1 35 35.00 sqrtsd %xmm0, %xmm2 # CHECK-NEXT: 1 38 35.00 * sqrtsd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * subpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 subpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 2.00 * subpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * subsd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 ucomisd %xmm0, %xmm1 @@ -687,12 +687,12 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - 412.00 8.00 152.50 86.50 3.00 3.00 134.00 +# CHECK-NEXT: - 412.00 12.00 152.50 90.50 3.00 3.00 134.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: -# CHECK-NEXT: - - - - 1.00 - - - addpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 addpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 2.00 - - - addpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 2.00 - - 1.00 addpd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - addsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 addsd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - - andnpd %xmm0, %xmm2 @@ -794,8 +794,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - - movupd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - - 1.00 movupd %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 1.00 movupd (%rax), %xmm2 -# CHECK-NEXT: - - 2.00 1.00 - - - - mulpd %xmm0, %xmm2 -# CHECK-NEXT: - - 2.00 1.00 - - - 1.00 mulpd (%rax), %xmm2 +# CHECK-NEXT: - - 4.00 1.00 - - - - mulpd %xmm0, %xmm2 +# CHECK-NEXT: - - 4.00 1.00 - - - 1.00 mulpd (%rax), %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - - mulsd %xmm0, %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 mulsd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 - - - orpd %xmm0, %xmm2 @@ -946,8 +946,8 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - 70.00 - 1.00 - - - 1.00 sqrtpd (%rax), %xmm2 # CHECK-NEXT: - 35.00 - 1.00 - - - - sqrtsd %xmm0, %xmm2 # CHECK-NEXT: - 35.00 - 1.00 - - - 1.00 sqrtsd (%rax), %xmm2 -# CHECK-NEXT: - - - - 1.00 - - - subpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 subpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 2.00 - - - subpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 2.00 - - 1.00 subpd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - subsd %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 subsd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - ucomisd %xmm0, %xmm1 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s index 484c353b07b..23949737b3c 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse3.s @@ -43,18 +43,18 @@ mwait # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * addsubpd (%rax), %xmm2 +# CHECK-NEXT: 1 4 2.00 addsubpd %xmm0, %xmm2 +# CHECK-NEXT: 1 7 2.00 * addsubpd (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * addsubps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 haddpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * haddpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 haddps %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * haddps (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 hsubpd %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * hsubpd (%rax), %xmm2 -# CHECK-NEXT: 1 3 1.00 hsubps %xmm0, %xmm2 -# CHECK-NEXT: 1 6 1.00 * hsubps (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 haddpd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * haddpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 haddps %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * haddps (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 hsubpd %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * hsubpd (%rax), %xmm2 +# CHECK-NEXT: 4 6 3.00 hsubps %xmm0, %xmm2 +# CHECK-NEXT: 4 9 3.00 * hsubps (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 * lddqu (%rax), %xmm2 # CHECK-NEXT: 1 100 1.00 U monitor # CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2 @@ -77,22 +77,22 @@ mwait # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 16.00 12.00 - - 10.00 +# CHECK-NEXT: - - - 32.00 30.00 - - 10.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: -# CHECK-NEXT: - - - - 1.00 - - - addsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - - - 1.00 - - 1.00 addsubpd (%rax), %xmm2 +# CHECK-NEXT: - - - - 2.00 - - - addsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - - - 2.00 - - 1.00 addsubpd (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - addsubps %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 addsubps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - haddpd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 haddpd (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - haddps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 haddps (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - hsubpd %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 hsubpd (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - - hsubps %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 1.00 - - 1.00 hsubps (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - haddpd %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddpd (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - haddps %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 haddps (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - hsubpd %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubpd (%rax), %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - - hsubps %xmm0, %xmm2 +# CHECK-NEXT: - - - 3.00 3.00 - - 1.00 hsubps (%rax), %xmm2 # CHECK-NEXT: - - - - - - - 1.00 lddqu (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - monitor # CHECK-NEXT: - - - 1.00 - - - - movddup %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s index ce74a7625e7..0dc83a0fb62 100644 --- a/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s @@ -159,10 +159,10 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 1 4 1.00 * blendpd $11, (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 blendps $11, %xmm0, %xmm2 # CHECK-NEXT: 1 4 1.00 * blendps $11, (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * blendvpd %xmm0, (%rax), %xmm2 -# CHECK-NEXT: 1 1 1.00 blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: 1 4 1.00 * blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 3 4 4.00 blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 4.00 * blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: 3 4 4.00 blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: 3 7 4.00 * blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * dppd $22, (%rax), %xmm2 # CHECK-NEXT: 1 3 1.00 dpps $22, %xmm0, %xmm2 @@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] -# CHECK-NEXT: - - - 73.00 22.00 - - 54.00 +# CHECK-NEXT: - - - 85.00 22.00 - - 54.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions: @@ -272,10 +272,10 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 blendpd $11, (%rax), %xmm2 # CHECK-NEXT: - - - 1.00 - - - - blendps $11, %xmm0, %xmm2 # CHECK-NEXT: - - - 1.00 - - - 1.00 blendps $11, (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - blendvpd %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2 -# CHECK-NEXT: - - - 1.00 - - - - blendvps %xmm0, %xmm0, %xmm2 -# CHECK-NEXT: - - - 1.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - 4.00 - - - - blendvpd %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvpd %xmm0, (%rax), %xmm2 +# CHECK-NEXT: - - - 4.00 - - - - blendvps %xmm0, %xmm0, %xmm2 +# CHECK-NEXT: - - - 4.00 - - - 1.00 blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - dppd $22, %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - - 1.00 dppd $22, (%rax), %xmm2 # CHECK-NEXT: - - - - 1.00 - - - dpps $22, %xmm0, %xmm2 |