From 5076b98fb91b5befd592cbcf220b0de28855fc59 Mon Sep 17 00:00:00 2001 From: Andrea Di Biagio Date: Wed, 28 Mar 2018 12:12:04 +0000 Subject: [X86][BtVer2] Fix the number of micro opcodes for AES[ENC|DEC] and other YMM instructions. Similar to r328694. The number of micro opcodes should be 2 for those instructions. This was found when testing AVX code for BtVer2 using llvm-mca. llvm-svn: 328698 --- llvm/lib/Target/X86/X86ScheduleBtVer2.td | 5 ++- .../tools/llvm-mca/X86/BtVer2/resources-avx1.s | 44 +++++++++++----------- 2 files changed, 26 insertions(+), 23 deletions(-) (limited to 'llvm') diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 1b4d1ad73fa..9e3f5449aba 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -372,7 +372,7 @@ def : WriteRes { let Latency = 3; } defm : JWriteResFpuPair; defm : JWriteResFpuPair; -defm : JWriteResFpuPair; +defm : JWriteResFpuPair; //////////////////////////////////////////////////////////////////////////////// // Horizontal add/sub instructions. @@ -748,18 +748,21 @@ def : InstRW<[JWriteShuffleYLd, ReadAfterLd], (instrs VBLENDPDYrmi, VBLENDPSYrmi def JWriteVBROADCASTYLd: SchedWriteRes<[JLAGU, JFPU01, JFPX]> { let Latency = 6; let ResourceCycles = [1, 2, 4]; + let NumMicroOps = 2; } def : InstRW<[JWriteVBROADCASTYLd, ReadAfterLd], (instrs VBROADCASTSDYrm, VBROADCASTSSYrm)>; def JWriteFPAY22: SchedWriteRes<[JFPU0, JFPA]> { let Latency = 2; let ResourceCycles = [2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteFPAY22], (instregex "VCMPP(S|D)Yrri", "VM(AX|IN)P(D|S)Yrr")>; def JWriteFPAY22Ld: SchedWriteRes<[JLAGU, JFPU0, JFPA]> { let Latency = 7; let ResourceCycles = [2, 2, 2]; + let NumMicroOps = 2; } def : InstRW<[JWriteFPAY22Ld, ReadAfterLd], (instregex "VCMPP(S|D)Yrmi", "VM(AX|IN)P(D|S)Yrm")>; diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index d381fb3dd3a..f0a5fdf36d5 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1038,14 +1038,14 @@ vzeroupper # CHECK-NEXT: 1 8 1.00 * vaddsubps (%rax), %xmm1, %xmm2 # CHECK-NEXT: 2 3 2.00 vaddsubps %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 2 8 2.00 * vaddsubps (%rax), %ymm1, %ymm2 -# CHECK-NEXT: 1 3 1.00 vaesdec %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesdec (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vaesenc %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesenc (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 1 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesdec %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesdec (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesdeclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesdeclast (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesenc %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesenc (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 2 3 1.00 vaesenclast %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 8 1.00 * vaesenclast (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vaesimc %xmm0, %xmm2 # CHECK-NEXT: 1 7 1.00 * vaesimc (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 vaeskeygenassist $22, %xmm0, %xmm2 @@ -1083,17 +1083,17 @@ vzeroupper # CHECK-NEXT: 6 3 3.00 vblendvps %ymm3, %ymm0, %ymm1, %ymm2 # CHECK-NEXT: 6 8 3.00 * vblendvps %ymm3, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 6 1.00 * vbroadcastf128 (%rax), %ymm2 -# CHECK-NEXT: 1 6 2.00 * vbroadcastsd (%rax), %ymm2 +# CHECK-NEXT: 2 6 2.00 * vbroadcastsd (%rax), %ymm2 # CHECK-NEXT: 1 6 1.00 * vbroadcastss (%rax), %xmm2 -# CHECK-NEXT: 1 6 2.00 * vbroadcastss (%rax), %ymm2 +# CHECK-NEXT: 2 6 2.00 * vbroadcastss (%rax), %ymm2 # CHECK-NEXT: 1 2 1.00 vcmppd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vcmppd $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vcmppd $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vcmppd $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vcmpps $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vcmpps $0, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vcmpps $0, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vcmpps $0, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vcmpsd $0, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vcmpsd $0, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vcmpss $0, %xmm0, %xmm1, %xmm2 @@ -1218,24 +1218,24 @@ vzeroupper # CHECK-NEXT: 2 6 2.00 * * vmaskmovps %ymm0, %ymm1, (%rax) # CHECK-NEXT: 1 2 1.00 vmaxpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vmaxpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vmaxpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vmaxps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vmaxps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vmaxps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vmaxps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vmaxps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vmaxsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vmaxss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vmaxss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vminpd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vminpd (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vminpd %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vminpd (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vminpd %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vminpd (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vminps %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vminps (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 2 2.00 vminps %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 1 7 2.00 * vminps (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 2 2 2.00 vminps %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 2 7 2.00 * vminps (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 2 1.00 vminsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 7 1.00 * vminsd (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 vminss %xmm0, %xmm1, %xmm2 -- cgit v1.2.3