summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-08-31 08:30:47 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-08-31 08:30:47 +0000
commitb998eae2f2327b6df9ea71e8fb475b84cc657b57 (patch)
tree3470520ada860af4639e9c43eac5cf8e45542ad7
parentc38b3f03084c2c664947217c4441d335a31e4d9a (diff)
downloadbcm5719-llvm-b998eae2f2327b6df9ea71e8fb475b84cc657b57.tar.gz
bcm5719-llvm-b998eae2f2327b6df9ea71e8fb475b84cc657b57.zip
[X86][BtVer2] Fix WriteFShuffle256 schedule write info.
This patch fixes the number of micro opcodes, and processor resource cycles for the following AVX instructions: vinsertf128rr/rm vperm2f128rr/rm vbroadcastf128 Tests have been regenerated using the usual scripts in the llvm/utils directory. Differential Revision: https://reviews.llvm.org/D51492 llvm-svn: 341185
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td8
-rw-r--r--llvm/test/CodeGen/X86/avx-schedule.ll12
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s22
3 files changed, 23 insertions, 19 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index e80b09930e1..d57db9886c8 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -370,7 +370,7 @@ defm : X86WriteResPairUnsupported<WriteFBlendZ>;
defm : JWriteResFpuPair<WriteFVarBlend, [JFPU01, JFPX], 2, [1, 4], 3>;
defm : JWriteResYMMPair<WriteFVarBlendY, [JFPU01, JFPX], 3, [2, 6], 6>;
defm : X86WriteResPairUnsupported<WriteFVarBlendZ>;
-defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1>;
+defm : JWriteResFpuPair<WriteFShuffle256, [JFPU01, JFPX], 1, [2, 2], 2>;
defm : X86WriteResPairUnsupported<WriteFVarShuffle256>;
////////////////////////////////////////////////////////////////////////////////
@@ -562,13 +562,17 @@ def : InstRW<[JWriteINSERTQ], (instrs INSERTQ, INSERTQI)>;
// AVX instructions.
////////////////////////////////////////////////////////////////////////////////
+def JWriteVecExtractF128: SchedWriteRes<[JFPU01, JFPX]>;
+def : InstRW<[JWriteVecExtractF128], (instrs VEXTRACTF128rr)>;
+
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
let NumMicroOps = 2;
}
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm,
- VBROADCASTSSYrm)>;
+ VBROADCASTSSYrm,
+ VBROADCASTF128)>;
def JWriteJVZEROALL: SchedWriteRes<[]> {
let Latency = 90;
diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll
index 7ec84256a9f..ec811f9dd19 100644
--- a/llvm/test/CodeGen/X86/avx-schedule.ll
+++ b/llvm/test/CodeGen/X86/avx-schedule.ll
@@ -768,7 +768,7 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) {
;
; BTVER2-LABEL: test_broadcastf128:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:1.00]
+; BTVER2-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [6:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_broadcastf128:
@@ -1225,7 +1225,7 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) {
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
; BTVER2-NEXT: vcvtpd2dq %ymm0, %xmm0 # sched: [6:2.00]
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_cvtpd2dq:
@@ -1289,7 +1289,7 @@ define <8 x i32> @test_cvttpd2dq(<4 x double> %a0, <4 x double> *%a1) {
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:2.00]
; BTVER2-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:2.00]
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_cvttpd2dq:
@@ -1352,7 +1352,7 @@ define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) {
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:2.00]
; BTVER2-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:2.00]
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:0.50]
+; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_cvtpd2ps:
@@ -1984,7 +1984,7 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float
;
; BTVER2-LABEL: test_insertf128:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:0.50]
+; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00]
; BTVER2-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [6:1.00]
; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
@@ -3526,7 +3526,7 @@ define <4 x double> @test_perm2f128(<4 x double> %a0, <4 x double> %a1, <4 x dou
;
; BTVER2-LABEL: test_perm2f128:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:0.50]
+; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm1 = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; BTVER2-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1] sched: [6:1.00]
; BTVER2-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
index c475e1692fb..63552990a2a 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
@@ -1082,7 +1082,7 @@ vzeroupper
# CHECK-NEXT: 3 7 2.00 * vblendvps %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 6 3 3.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 6 8 3.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 6 1.00 * vbroadcastf128 (%rax), %ymm2
+# CHECK-NEXT: 2 6 2.00 * vbroadcastf128 (%rax), %ymm2
# CHECK-NEXT: 2 6 2.00 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 1.00 * vbroadcastss (%rax), %xmm2
# CHECK-NEXT: 2 6 2.00 * vbroadcastss (%rax), %ymm2
@@ -1200,8 +1200,8 @@ vzeroupper
# CHECK-NEXT: 1 8 1.00 * vhsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vhsubps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 1 0.50 vinsertf128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 6 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 1 1.00 vinsertf128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 6 1.00 * vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vinsertps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vinsertps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 1.00 * vlddqu (%rax), %xmm2
@@ -1409,8 +1409,8 @@ vzeroupper
# CHECK-NEXT: 1 6 1.00 * vpcmpgtq (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpgtw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpcmpgtw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 1 0.50 vperm2f128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 6 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 1 1.00 vperm2f128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 6 1.00 * vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 1 0.50 vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: 1 6 1.00 * vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: 3 2 2.00 vpermilpd %xmm0, %xmm1, %xmm2
@@ -1720,7 +1720,7 @@ vzeroupper
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
-# CHECK-NEXT: 48.00 - - 353.50 911.50 402.00 422.00 382.00 - 43.00 132.00 119.50 119.50 38.00
+# CHECK-NEXT: 48.00 - - 357.00 915.00 404.50 424.50 382.00 - 43.00 132.00 119.50 119.50 38.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
@@ -1788,7 +1788,7 @@ vzeroupper
# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 1.00 - - - - - - vblendvps %xmm3, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 - - - - - - - vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 3.00 3.00 1.00 1.00 2.00 - - - - - - vblendvps %ymm3, (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vbroadcastf128 (%rax), %ymm2
+# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 1.00 - - - - - - vbroadcastf128 (%rax), %ymm2
# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 1.00 - - - - - - vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vbroadcastss (%rax), %xmm2
# CHECK-NEXT: - - - 2.00 2.00 1.00 1.00 1.00 - - - - - - vbroadcastss (%rax), %ymm2
@@ -1906,8 +1906,8 @@ vzeroupper
# CHECK-NEXT: - - - 1.00 - 1.00 - 1.00 - - - - - - vhsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - 2.00 - 2.00 - - - - - - - - vhsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - - 2.00 - 2.00 - 2.00 - - - - - - vhsubps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vinsertf128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vinsertf128 $1, %xmm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vinsertf128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vinsertps $1, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vinsertps $1, (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vlddqu (%rax), %xmm2
@@ -2115,8 +2115,8 @@ vzeroupper
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vpcmpgtq (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vpcmpgtw %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vpcmpgtw (%rax), %xmm1, %xmm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vperm2f128 $1, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vperm2f128 $1, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 1.00 - - - - - - vperm2f128 $1, (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vpermilpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vpermilpd $1, (%rax), %xmm2
# CHECK-NEXT: - - - 2.00 2.00 0.50 0.50 - - - - - - - vpermilpd %xmm0, %xmm1, %xmm2
OpenPOWER on IntegriCloud