diff options
-rw-r--r-- | llvm/lib/Target/X86/X86SchedSandyBridge.td | 10 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx2-schedule.ll | 4 | ||||
-rwxr-xr-x | llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll | 32 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s | 10 |
4 files changed, 24 insertions, 32 deletions
diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 8b457f91254..860d0db3f6f 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -288,7 +288,7 @@ defm : X86WriteRes<WriteVecStoreNTY, [SBPort23,SBPort4], 1, [1,1], 1>; defm : X86WriteRes<WriteVecMaskedStore, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1], 3>; defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>; -defm : X86WriteRes<WriteVecMoveX, [SBPort05], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveX, [SBPort015], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>; defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>; @@ -526,14 +526,6 @@ def: InstRW<[SBWriteResGroup5], (instregex "MMX_PABS(B|D|W)rr", "MMX_PALIGNRrri", "MMX_PSIGN(B|D|W)rr")>; -def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr", - "MOVDQ(A|U)rr")>; // NOTE: Different port requirements to VEX equivalents - def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> { let Latency = 2; let NumMicroOps = 2; diff --git a/llvm/test/CodeGen/X86/avx2-schedule.ll b/llvm/test/CodeGen/X86/avx2-schedule.ll index a27a4118263..1f1dccbf5b7 100644 --- a/llvm/test/CodeGen/X86/avx2-schedule.ll +++ b/llvm/test/CodeGen/X86/avx2-schedule.ll @@ -3386,7 +3386,7 @@ define <4 x i32> @test_pmaskmovd(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaskmovd (%rdi), %xmm0, %xmm2 # sched: [8:1.00] ; GENERIC-NEXT: vpmaskmovd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaskmovd: @@ -3484,7 +3484,7 @@ define <2 x i64> @test_pmaskmovq(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpmaskmovq (%rdi), %xmm0, %xmm2 # sched: [8:1.00] ; GENERIC-NEXT: vpmaskmovq %xmm1, %xmm0, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm2, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaskmovq: diff --git a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll index 29fe6ca6eb4..7bd2368696c 100755 --- a/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffle-schedule.ll @@ -4536,7 +4536,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mask0: @@ -4573,7 +4573,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mask1: @@ -4610,7 +4610,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mask2: @@ -4660,7 +4660,7 @@ define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmb %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6] sched: [7:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_16xi8_perm_mask3: @@ -5658,7 +5658,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %v ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask0: @@ -5695,7 +5695,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask1: @@ -5732,7 +5732,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %v ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask2: @@ -5782,7 +5782,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask3: @@ -5819,7 +5819,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %v ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask4: @@ -5856,7 +5856,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask5: @@ -5906,7 +5906,7 @@ define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %v ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_high_mask6: @@ -5943,7 +5943,7 @@ define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %ve ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmw %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_8xi16_perm_low_mask7: @@ -7705,7 +7705,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mask0: @@ -7742,7 +7742,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mask1: @@ -7779,7 +7779,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mask2: @@ -7829,7 +7829,7 @@ define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, ; GENERIC: # %bb.0: ; GENERIC-NEXT: vptestnmd %xmm2, %xmm2, %k1 # sched: [1:0.33] ; GENERIC-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3] sched: [1:0.50] -; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: vmovdqa %xmm1, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: test_masked_4xi32_perm_mask3: diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s index fc38345b44d..aaf45860cdf 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-avx1.s @@ -1260,13 +1260,13 @@ vzeroupper # CHECK-NEXT: 1 6 0.50 * vmovddup (%rax), %xmm2 # CHECK-NEXT: 1 1 1.00 vmovddup %ymm0, %ymm2 # CHECK-NEXT: 1 7 0.50 * vmovddup (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.50 vmovdqa %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * vmovdqa %xmm0, (%rax) # CHECK-NEXT: 1 6 0.50 * vmovdqa (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 vmovdqa %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 * vmovdqa %ymm0, (%rax) # CHECK-NEXT: 1 7 0.50 * vmovdqa (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.50 vmovdqu %xmm0, %xmm2 +# CHECK-NEXT: 1 1 0.33 vmovdqu %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * vmovdqu %xmm0, (%rax) # CHECK-NEXT: 1 6 0.50 * vmovdqu (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 vmovdqu %ymm0, %ymm2 @@ -1714,7 +1714,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 572.00 225.50 307.00 39.00 354.50 177.50 177.50 +# CHECK-NEXT: - 572.00 225.17 307.67 39.00 354.17 177.50 177.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1960,13 +1960,13 @@ vzeroupper # CHECK-NEXT: - - - - - - 0.50 0.50 vmovddup (%rax), %xmm2 # CHECK-NEXT: - - - - - 1.00 - - vmovddup %ymm0, %ymm2 # CHECK-NEXT: - - - - - - 0.50 0.50 vmovddup (%rax), %ymm2 -# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa %xmm0, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa %xmm0, (%rax) # CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa (%rax), %xmm2 # CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqa %ymm0, %ymm2 # CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqa %ymm0, (%rax) # CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqa (%rax), %ymm2 -# CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu %xmm0, %xmm2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vmovdqu %xmm0, %xmm2 # CHECK-NEXT: - - - - 1.00 - 0.50 0.50 vmovdqu %xmm0, (%rax) # CHECK-NEXT: - - - - - - 0.50 0.50 vmovdqu (%rax), %xmm2 # CHECK-NEXT: - - 0.50 - - 0.50 - - vmovdqu %ymm0, %ymm2 |