diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-03-28 12:12:04 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2018-03-28 12:12:04 +0000 |
commit | 5076b98fb91b5befd592cbcf220b0de28855fc59 (patch) | |
tree | 03da518941b567d2703eeec915741ea3e9d39017 | |
parent | 4e7ad0805e5d9b947ec504a5c43c2e4092c8cfc9 (diff) | |
download | bcm5719-llvm-5076b98fb91b5befd592cbcf220b0de28855fc59.tar.gz bcm5719-llvm-5076b98fb91b5befd592cbcf220b0de28855fc59.zip |
[X86][BtVer2] Fix the number of micro opcodes for AES[ENC|DEC] and other YMM instructions.
Similar to r328694. The number of micro opcodes should be 2 for those
instructions.
This was found when testing AVX code for BtVer2 using llvm-mca.
llvm-svn: 328698
-rw-r--r-- | llvm/lib/Target/X86/X86ScheduleBtVer2.td | 5 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s | 44 |
2 files changed, 26 insertions, 23 deletions
diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 1b4d1ad73fa..9e3f5449aba 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -372,7 +372,7 @@ def : WriteRes<WriteMMXMOVMSK, [JFPU0, JFPA, JALU0]> { let Latency = 3; } defm : JWriteResFpuPair<WriteAESIMC, [JFPU0, JVIMUL], 2>; defm : JWriteResFpuPair<WriteAESKeyGen, [JFPU0, JVIMUL], 2>; -defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3>; +defm : JWriteResFpuPair<WriteAESDecEnc, [JFPU0, JVIMUL], 3, [1], 2>; //////////////////////////////////////////////////////////////////////////////// // Horizontal add/sub instructions. @@ -748,18 +748,21 @@ def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; let ResourceCycles = [1, 2, 4]; + let NumMicroOps = 2; } def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>; def JWriteFPAY22: SchedWriteRes<[JFPU0, JFPA]> { let Latency = 2; let ResourceCycles = [2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>; def JWriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { let Latency = 7; let ResourceCycles = [2, 2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index d381fb3dd3a..f0a5fdf36d5 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1038,14 +1038,14 @@ vzeroupper # CHECK-NEXT: 1 8 1.00 * vaddsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 3 2.00 vaddsubps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 2.00 * vaddsubps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vaesdec %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesdec (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vaesenc %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesenc (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesdec %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesenc %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vaesimc %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * vaesimc (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 vaeskeygenassist $22, %xmm0, %xmm2 @@ -1083,17 +1083,17 @@ vzeroupper # CHECK-NEXT: 6 3 3.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 6 8 3.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 6 1.00 * vbroadcastf128 (%rax), %ymm2 -# CHECK-NEXT: 1 6 2.00 * vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: 2 6 2.00 * vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: 1 6 1.00 * vbroadcastss (%rax), %xmm2 -# CHECK-NEXT: 1 6 2.00 * vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: 2 6 2.00 * vbroadcastss (%rax), %ymm2 # CHECK-NEXT: 1 2 1.00 vcmppd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vcmppd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vcmpps $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vcmpps $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vcmpsd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vcmpsd $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vcmpss $0, %xmm0, %xmm1, %xmm2 @@ -1218,24 +1218,24 @@ vzeroupper # CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vmaxps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vmaxps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vmaxsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vmaxss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vminpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vminpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vminps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vminps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vminsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vminss %xmm0, %xmm1, %xmm2 |