summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-03-28 12:12:04 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2018-03-28 12:12:04 +0000
commit5076b98fb91b5befd592cbcf220b0de28855fc59 (patch)
tree03da518941b567d2703eeec915741ea3e9d39017
parent4e7ad0805e5d9b947ec504a5c43c2e4092c8cfc9 (diff)
downloadbcm5719-llvm-5076b98fb91b5befd592cbcf220b0de28855fc59.tar.gz
bcm5719-llvm-5076b98fb91b5befd592cbcf220b0de28855fc59.zip
[X86][BtVer2] Fix the number of micro opcodes for AES[ENC|DEC] and other YMM instructions.
Similar to r328694. The number of micro opcodes should be 2 for those instructions. This was found when testing AVX code for BtVer2 using llvm-mca. llvm-svn: 328698
-rw-r--r--llvm/lib/Target/X86/X86ScheduleBtVer2.td5
-rw-r--r--llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s44
2 files changed, 26 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
index 1b4d1ad73fa..9e3f5449aba 100644
--- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td
+++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td
@@ -372,7 +372,7 @@ def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; }
defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>;
defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>;
-defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3>;
+defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1], 2>;
////////////////////////////////////////////////////////////////////////////////
// Horizontal add/sub instructions.
@@ -748,18 +748,21 @@ def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi
def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> {
let Latency = 6;
let ResourceCycles = [1, 2, 4];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>;
def JWriteFPAY22: SchedWriteRes<[JFPU0, JFPA]> {
let Latency = 2;
let ResourceCycles = [2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>;
def JWriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0, JFPA]> {
let Latency = 7;
let ResourceCycles = [2, 2, 2];
+ let NumMicroOps = 2;
}
def : InstRW<[JWriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>;
diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
index d381fb3dd3a..f0a5fdf36d5 100644
--- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
+++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s
@@ -1038,14 +1038,14 @@ vzeroupper
# CHECK-NEXT: 1 8 1.00 * vaddsubps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 2 3 2.00 vaddsubps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 8 2.00 * vaddsubps (%rax), %ymm1, %ymm2
-# CHECK-NEXT: 1 3 1.00 vaesdec %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesdec (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 3 1.00 vaesenc %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesenc (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2
-# CHECK-NEXT: 1 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesdec %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesdec (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesenc %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesenc (%rax), %xmm1, %xmm2
+# CHECK-NEXT: 2 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2
+# CHECK-NEXT: 2 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vaesimc %xmm0, %xmm2
# CHECK-NEXT: 1 7 1.00 * vaesimc (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 vaeskeygenassist $22, %xmm0, %xmm2
@@ -1083,17 +1083,17 @@ vzeroupper
# CHECK-NEXT: 6 3 3.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 6 8 3.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 6 1.00 * vbroadcastf128 (%rax), %ymm2
-# CHECK-NEXT: 1 6 2.00 * vbroadcastsd (%rax), %ymm2
+# CHECK-NEXT: 2 6 2.00 * vbroadcastsd (%rax), %ymm2
# CHECK-NEXT: 1 6 1.00 * vbroadcastss (%rax), %xmm2
-# CHECK-NEXT: 1 6 2.00 * vbroadcastss (%rax), %ymm2
+# CHECK-NEXT: 2 6 2.00 * vbroadcastss (%rax), %ymm2
# CHECK-NEXT: 1 2 1.00 vcmppd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmppd $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vcmpps $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmpps $0, (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vcmpsd $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vcmpsd $0, (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vcmpss $0, %xmm0, %xmm1, %xmm2
@@ -1218,24 +1218,24 @@ vzeroupper
# CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax)
# CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vmaxps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vmaxps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vmaxps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vmaxps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vmaxps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vmaxsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vmaxss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vmaxss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vminpd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminpd (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vminpd %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vminpd (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vminpd %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vminpd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vminps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminps (%rax), %xmm1, %xmm2
-# CHECK-NEXT: 1 2 2.00 vminps %ymm0, %ymm1, %ymm2
-# CHECK-NEXT: 1 7 2.00 * vminps (%rax), %ymm1, %ymm2
+# CHECK-NEXT: 2 2 2.00 vminps %ymm0, %ymm1, %ymm2
+# CHECK-NEXT: 2 7 2.00 * vminps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 2 1.00 vminsd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 7 1.00 * vminsd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 2 1.00 vminss %xmm0, %xmm1, %xmm2
OpenPOWER on IntegriCloud