diff options
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-schedule.ll | 408 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fp128-i128.ll | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/gather-addresses.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/recip-fastmath.ll | 46 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/recip-fastmath2.ll | 70 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse-schedule.ll | 248 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-schedule.ll | 598 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse3-schedule.ll | 48 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-schedule.ll | 222 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse42-schedule.ll | 38 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/ssse3-schedule.ll | 74 |
12 files changed, 896 insertions, 896 deletions
diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index 47e95fe31bd..500342730f1 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -10,8 +10,8 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: @@ -40,8 +40,8 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: @@ -70,8 +70,8 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: @@ -101,8 +101,8 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: @@ -131,10 +131,10 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: @@ -172,10 +172,10 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: @@ -213,10 +213,10 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_andpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: @@ -252,10 +252,10 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_andps: ; SANDY: # BB#0: -; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: @@ -291,10 +291,10 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: @@ -326,9 +326,9 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: @@ -356,9 +356,9 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: @@ -387,9 +387,9 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: @@ -418,8 +418,8 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x f define <8 x float> @test_broadcastf128(<4 x float> *%a0) { ; SANDY-LABEL: test_broadcastf128: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastf128: ; HASWELL: # BB#0: @@ -443,8 +443,8 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) { define <4 x double> @test_broadcastsd_ymm(double *%a0) { ; SANDY-LABEL: test_broadcastsd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastsd_ymm: ; HASWELL: # BB#0: @@ -469,8 +469,8 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) { define <4 x float> @test_broadcastss(float *%a0) { ; SANDY-LABEL: test_broadcastss: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastss: ; HASWELL: # BB#0: @@ -496,7 +496,7 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) { ; SANDY-LABEL: test_broadcastss_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_broadcastss_ymm: ; HASWELL: # BB#0: @@ -522,9 +522,9 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: @@ -560,9 +560,9 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: @@ -598,9 +598,9 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: @@ -632,12 +632,12 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [5:1.00] -; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [4:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: @@ -669,10 +669,10 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: @@ -704,10 +704,10 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: @@ -741,8 +741,8 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00] -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: @@ -774,9 +774,9 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00] +; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: @@ -804,9 +804,9 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00] +; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: @@ -834,9 +834,9 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] ; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: @@ -866,9 +866,9 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa ; SANDY-LABEL: test_extractf128: ; SANDY: # BB#0: ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_extractf128: ; HASWELL: # BB#0: @@ -900,7 +900,7 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SANDY: # BB#0: ; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: @@ -929,9 +929,9 @@ declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounw define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: @@ -960,9 +960,9 @@ declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: @@ -991,9 +991,9 @@ declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounw define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: @@ -1023,9 +1023,9 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float ; SANDY-LABEL: test_insertf128: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_insertf128: ; HASWELL: # BB#0: @@ -1059,8 +1059,8 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float define <32 x i8> @test_lddqu(i8* %a0) { ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: @@ -1084,10 +1084,10 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { ; SANDY-LABEL: test_maskmovpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] -; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] +; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovpd: ; HASWELL: # BB#0: @@ -1119,10 +1119,10 @@ declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) { ; SANDY-LABEL: test_maskmovpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovpd_ymm: ; HASWELL: # BB#0: @@ -1154,10 +1154,10 @@ declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwi define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { ; SANDY-LABEL: test_maskmovps: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] -; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] +; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovps: ; HASWELL: # BB#0: @@ -1189,10 +1189,10 @@ declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) { ; SANDY-LABEL: test_maskmovps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] ; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovps_ymm: ; HASWELL: # BB#0: @@ -1225,8 +1225,8 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: @@ -1256,8 +1256,8 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: @@ -1288,7 +1288,7 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: @@ -1319,7 +1319,7 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY: # BB#0: ; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: @@ -1348,10 +1348,10 @@ declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50] +; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: @@ -1382,10 +1382,10 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50] +; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: @@ -1417,9 +1417,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: @@ -1451,9 +1451,9 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { define i32 @test_movmskpd(<4 x double> %a0) { ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.33] +; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: @@ -1479,9 +1479,9 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone define i32 @test_movmskps(<8 x float> %a0) { ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.33] +; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: @@ -1508,8 +1508,8 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: @@ -1537,8 +1537,8 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: @@ -1566,9 +1566,9 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: @@ -1601,9 +1601,9 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: @@ -1635,12 +1635,12 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: @@ -1671,12 +1671,12 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: @@ -1708,8 +1708,8 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: @@ -1738,8 +1738,8 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: @@ -1767,10 +1767,10 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: orpd: ; HASWELL: # BB#0: @@ -1806,10 +1806,10 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: @@ -1846,9 +1846,9 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_permilpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilpd: ; HASWELL: # BB#0: @@ -1880,10 +1880,10 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_permilpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00] ; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilpd_ymm: ; HASWELL: # BB#0: @@ -1916,9 +1916,9 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_permilps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilps: ; HASWELL: # BB#0: @@ -1950,10 +1950,10 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_permilps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00] ; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilps_ymm: ; HASWELL: # BB#0: @@ -1986,8 +1986,8 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> ; SANDY-LABEL: test_permilvarpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarpd: ; HASWELL: # BB#0: @@ -2018,7 +2018,7 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarpd_ymm: ; HASWELL: # BB#0: @@ -2048,8 +2048,8 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> * ; SANDY-LABEL: test_permilvarps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarps: ; HASWELL: # BB#0: @@ -2080,7 +2080,7 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3 ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_permilvarps_ymm: ; HASWELL: # BB#0: @@ -2112,7 +2112,7 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: @@ -2148,7 +2148,7 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: @@ -2184,7 +2184,7 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: @@ -2217,10 +2217,10 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [9:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:3.00] +; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:3.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: @@ -2254,9 +2254,9 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: @@ -2289,8 +2289,8 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: @@ -2318,10 +2318,10 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:3.00] +; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:3.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: @@ -2354,10 +2354,10 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:3.00] +; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:3.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: @@ -2391,8 +2391,8 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: @@ -2421,8 +2421,8 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: @@ -2451,11 +2451,11 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-LABEL: test_testpd: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testpd: ; HASWELL: # BB#0: @@ -2495,12 +2495,12 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a ; SANDY-LABEL: test_testpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testpd_ymm: ; HASWELL: # BB#0: @@ -2542,11 +2542,11 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_testps: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testps: ; HASWELL: # BB#0: @@ -2586,12 +2586,12 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) ; SANDY-LABEL: test_testps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_testps_ymm: ; HASWELL: # BB#0: @@ -2635,7 +2635,7 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: @@ -2669,7 +2669,7 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: @@ -2698,9 +2698,9 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: @@ -2733,8 +2733,8 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: @@ -2762,10 +2762,10 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_xorpd: ; SANDY: # BB#0: -; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: @@ -2801,10 +2801,10 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_xorps: ; SANDY: # BB#0: -; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: @@ -2841,7 +2841,7 @@ define void @test_zeroall() { ; SANDY-LABEL: test_zeroall: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroall # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_zeroall: ; HASWELL: # BB#0: @@ -2866,7 +2866,7 @@ define void @test_zeroupper() { ; SANDY-LABEL: test_zeroupper: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_zeroupper: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll b/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll index 9d0900f3b42..4d0b5ccc16b 100644 --- a/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll +++ b/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll @@ -15,18 +15,18 @@ define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* noca ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: paddd (%ecx), %xmm0 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: paddd (%edx), %xmm0 -; CHECK-NEXT: movdqa %xmm0, (%edx) -; CHECK-NEXT: movl (%edx), %esi -; CHECK-NEXT: movl 4(%edx), %edi -; CHECK-NEXT: shll $4, %ecx -; CHECK-NEXT: movl 8(%edx), %ebx -; CHECK-NEXT: movl 12(%edx), %edx -; CHECK-NEXT: movl %esi, 12(%eax,%ecx) -; CHECK-NEXT: movl %edi, (%eax,%ecx) -; CHECK-NEXT: movl %ebx, 8(%eax,%ecx) -; CHECK-NEXT: movl %edx, 4(%eax,%ecx) +; CHECK-NEXT: movdqa %xmm0, (%ecx) +; CHECK-NEXT: movl (%ecx), %esi +; CHECK-NEXT: movl 4(%ecx), %edi +; CHECK-NEXT: shll $4, %edx +; CHECK-NEXT: movl 8(%ecx), %ebx +; CHECK-NEXT: movl 12(%ecx), %ecx +; CHECK-NEXT: movl %esi, 12(%eax,%edx) +; CHECK-NEXT: movl %edi, (%eax,%edx) +; CHECK-NEXT: movl %ebx, 8(%eax,%edx) +; CHECK-NEXT: movl %ecx, 4(%eax,%edx) ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebx diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll index 6c6bc8bdc1d..98082ec611d 100644 --- a/llvm/test/CodeGen/X86/fp128-i128.ll +++ b/llvm/test/CodeGen/X86/fp128-i128.ll @@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 { ; CHECK-NEXT: andq %rdi, %rcx ; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 ; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: jmp foo # TAILCALL diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll index c3109673468..e09ad3e4e0b 100644 --- a/llvm/test/CodeGen/X86/gather-addresses.ll +++ b/llvm/test/CodeGen/X86/gather-addresses.ll @@ -16,10 +16,10 @@ ; LIN: sarq $32, %r[[REG2]] ; LIN: movslq %e[[REG4]], %r[[REG3:.+]] ; LIN: sarq $32, %r[[REG4]] -; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0 -; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0 -; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 -; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 +; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 +; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 +; LIN: movq %rdi, %xmm1 +; LIN: movq %r[[REG3]], %xmm0 ; WIN: movdqa (%rdx), %xmm0 ; WIN: pand (%r8), %xmm0 @@ -29,10 +29,10 @@ ; WIN: sarq $32, %r[[REG2]] ; WIN: movslq %e[[REG4]], %r[[REG3:.+]] ; WIN: sarq $32, %r[[REG4]] -; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0 -; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0 -; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 -; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 +; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 +; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 +; WIN: movdqa (%r[[REG2]]), %xmm0 +; WIN: movq %r[[REG2]], %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>, <4 x i32>* %i diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll index cd4b02ca833..e0fdce5e444 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath.ll @@ -45,9 +45,9 @@ define float @f32_no_estimate(float %x) #0 { ; ; SANDY-LABEL: f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_no_estimate: ; HASWELL: # BB#0: @@ -113,11 +113,11 @@ define float @f32_one_step(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_one_step: ; HASWELL: # BB#0: @@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -215,7 +215,7 @@ define float @f32_two_step(float %x) #2 { ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_two_step: ; HASWELL: # BB#0: @@ -284,9 +284,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 { ; ; SANDY-LABEL: v4f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL: # BB#0: @@ -350,13 +350,13 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_one_step: ; HASWELL: # BB#0: @@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; ; SANDY-LABEL: v4f32_two_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -463,7 +463,7 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_two_step: ; HASWELL: # BB#0: @@ -546,9 +546,9 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 { ; ; SANDY-LABEL: v8f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL: # BB#0: @@ -621,11 +621,11 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_one_step: ; HASWELL: # BB#0: @@ -737,7 +737,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -745,7 +745,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_two_step: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll index c3c8fa3016a..4e627662170 100644 --- a/llvm/test/CodeGen/X86/recip-fastmath2.ll +++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll @@ -39,8 +39,8 @@ define float @f32_no_step_2(float %x) #3 { ; SANDY-LABEL: f32_no_step_2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_no_step_2: ; HASWELL: # BB#0: @@ -110,12 +110,12 @@ define float @f32_one_step_2(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_one_step_2: ; HASWELL: # BB#0: @@ -198,13 +198,13 @@ define float @f32_one_step_2_divs(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_one_step_2_divs: ; HASWELL: # BB#0: @@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -313,8 +313,8 @@ define float @f32_two_step_2(float %x) #2 { ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: f32_two_step_2: ; HASWELL: # BB#0: @@ -403,14 +403,14 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_one_step2: ; HASWELL: # BB#0: @@ -501,15 +501,15 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step_2_divs: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_one_step_2_divs: ; HASWELL: # BB#0: @@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; ; SANDY-LABEL: v4f32_two_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -629,8 +629,8 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v4f32_two_step2: ; HASWELL: # BB#0: @@ -741,12 +741,12 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_one_step2: ; HASWELL: # BB#0: @@ -848,13 +848,13 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_one_step_2_divs: ; HASWELL: # BB#0: @@ -980,7 +980,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -988,8 +988,8 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_two_step2: ; HASWELL: # BB#0: @@ -1070,7 +1070,7 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 { ; SANDY-LABEL: v8f32_no_step: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_no_step: ; HASWELL: # BB#0: @@ -1125,8 +1125,8 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 { ; SANDY-LABEL: v8f32_no_step2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: v8f32_no_step2: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index 52e6b61aedf..c41acd43b3a 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -31,8 +31,8 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: @@ -73,8 +73,8 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_addss: ; SANDY: # BB#0: ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addss: ; HASWELL: # BB#0: @@ -122,9 +122,9 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_andps: ; SANDY: # BB#0: -; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: @@ -176,9 +176,9 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: @@ -228,9 +228,9 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: @@ -277,7 +277,7 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpss: ; HASWELL: # BB#0: @@ -347,16 +347,16 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_comiss: ; SANDY: # BB#0: ; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_comiss: ; HASWELL: # BB#0: @@ -417,10 +417,10 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; ; SANDY-LABEL: test_cvtsi2ss: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2ss: ; HASWELL: # BB#0: @@ -466,10 +466,10 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; ; SANDY-LABEL: test_cvtsi2ssq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2ssq: ; HASWELL: # BB#0: @@ -515,10 +515,10 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [7:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtss2si: ; HASWELL: # BB#0: @@ -567,10 +567,10 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtss2siq: ; HASWELL: # BB#0: @@ -619,10 +619,10 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvttss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [7:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttss2si: ; HASWELL: # BB#0: @@ -668,10 +668,10 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvttss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttss2siq: ; HASWELL: # BB#0: @@ -714,9 +714,9 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: @@ -756,9 +756,9 @@ define float @test_divss(float %a0, float %a1, float *%a2) { ; ; SANDY-LABEL: test_divss: ; SANDY: # BB#0: -; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divss: ; HASWELL: # BB#0: @@ -799,8 +799,8 @@ define void @test_ldmxcsr(i32 %a0) { ; SANDY-LABEL: test_ldmxcsr: ; SANDY: # BB#0: ; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ldmxcsr: ; HASWELL: # BB#0: @@ -843,8 +843,8 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: @@ -886,8 +886,8 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_maxss: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxss: ; HASWELL: # BB#0: @@ -929,8 +929,8 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_minps: ; SANDY: # BB#0: ; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: @@ -972,8 +972,8 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_minss: ; SANDY: # BB#0: ; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minss: ; HASWELL: # BB#0: @@ -1017,10 +1017,10 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: @@ -1068,7 +1068,7 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { ; SANDY-LABEL: test_movhlps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movhlps: ; HASWELL: # BB#0: @@ -1111,10 +1111,10 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movhps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movhps: ; HASWELL: # BB#0: @@ -1164,7 +1164,7 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movlhps: ; HASWELL: # BB#0: @@ -1206,10 +1206,10 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movlps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movlps: ; HASWELL: # BB#0: @@ -1254,8 +1254,8 @@ define i32 @test_movmskps(<4 x float> %a0) { ; ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: @@ -1295,8 +1295,8 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: @@ -1335,10 +1335,10 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; ; SANDY-LABEL: test_movss_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movss_mem: ; HASWELL: # BB#0: @@ -1383,8 +1383,8 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { ; ; SANDY-LABEL: test_movss_reg: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movss_reg: ; HASWELL: # BB#0: @@ -1423,10 +1423,10 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: @@ -1469,8 +1469,8 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: @@ -1511,8 +1511,8 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_mulss: ; SANDY: # BB#0: ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulss: ; HASWELL: # BB#0: @@ -1560,9 +1560,9 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: @@ -1609,8 +1609,8 @@ define void @test_prefetchnta(i8* %a0) { ; ; SANDY-LABEL: test_prefetchnta: ; SANDY: # BB#0: -; SANDY-NEXT: prefetchnta (%rdi) # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_prefetchnta: ; HASWELL: # BB#0: @@ -1652,10 +1652,10 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_rcpps: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [7:3.00] +; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: @@ -1708,10 +1708,10 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SANDY-LABEL: test_rcpss: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rcpss: ; HASWELL: # BB#0: @@ -1765,9 +1765,9 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: ; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: @@ -1819,11 +1819,11 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; ; SANDY-LABEL: test_rsqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_rsqrtss: ; HASWELL: # BB#0: @@ -1875,7 +1875,7 @@ define void @test_sfence() { ; SANDY-LABEL: test_sfence: ; SANDY: # BB#0: ; SANDY-NEXT: sfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sfence: ; HASWELL: # BB#0: @@ -1917,8 +1917,8 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: @@ -1962,10 +1962,10 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: @@ -2017,11 +2017,11 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_sqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtss: ; HASWELL: # BB#0: @@ -2067,9 +2067,9 @@ define i32 @test_stmxcsr() { ; ; SANDY-LABEL: test_stmxcsr: ; SANDY: # BB#0: -; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_stmxcsr: ; HASWELL: # BB#0: @@ -2112,8 +2112,8 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: @@ -2154,8 +2154,8 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_subss: ; SANDY: # BB#0: ; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subss: ; HASWELL: # BB#0: @@ -2220,16 +2220,16 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_ucomiss: ; SANDY: # BB#0: ; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ucomiss: ; HASWELL: # BB#0: @@ -2292,8 +2292,8 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_unpckhps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: @@ -2338,8 +2338,8 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: @@ -2387,9 +2387,9 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_xorps: ; SANDY: # BB#0: -; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 14c155c8c6c..3c36b213813 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -31,8 +31,8 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: @@ -73,8 +73,8 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_addsd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsd: ; HASWELL: # BB#0: @@ -117,10 +117,10 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_andpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: @@ -170,10 +170,10 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: @@ -226,9 +226,9 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: @@ -275,7 +275,7 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cmpsd: ; HASWELL: # BB#0: @@ -345,16 +345,16 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-LABEL: test_comisd: ; SANDY: # BB#0: ; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_comisd: ; HASWELL: # BB#0: @@ -416,9 +416,9 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: @@ -467,10 +467,10 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: @@ -517,10 +517,10 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: @@ -568,10 +568,10 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: @@ -620,9 +620,9 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_cvtps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: @@ -670,10 +670,10 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_cvtps2pd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtps2pd: ; HASWELL: # BB#0: @@ -724,7 +724,7 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00] ; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsd2si: ; HASWELL: # BB#0: @@ -773,10 +773,10 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvtsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsd2siq: ; HASWELL: # BB#0: @@ -830,10 +830,10 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SANDY-LABEL: test_cvtsd2ss: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsd2ss: ; HASWELL: # BB#0: @@ -882,9 +882,9 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SANDY-LABEL: test_cvtsi2sd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2sd: ; HASWELL: # BB#0: @@ -931,9 +931,9 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SANDY-LABEL: test_cvtsi2sdq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] +; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtsi2sdq: ; HASWELL: # BB#0: @@ -985,11 +985,11 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2sd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] -; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00] +; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtss2sd: ; HASWELL: # BB#0: @@ -1038,10 +1038,10 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvttpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttpd2dq: ; HASWELL: # BB#0: @@ -1091,9 +1091,9 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_cvttps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttps2dq: ; HASWELL: # BB#0: @@ -1139,10 +1139,10 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvttsd2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [3:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] ; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttsd2si: ; HASWELL: # BB#0: @@ -1188,10 +1188,10 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvttsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [3:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [7:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttsd2siq: ; HASWELL: # BB#0: @@ -1234,9 +1234,9 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: @@ -1276,9 +1276,9 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; ; SANDY-LABEL: test_divsd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] -; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_divsd: ; HASWELL: # BB#0: @@ -1322,7 +1322,7 @@ define void @test_lfence() { ; SANDY-LABEL: test_lfence: ; SANDY: # BB#0: ; SANDY-NEXT: lfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_lfence: ; HASWELL: # BB#0: @@ -1363,7 +1363,7 @@ define void @test_mfence() { ; SANDY-LABEL: test_mfence: ; SANDY: # BB#0: ; SANDY-NEXT: mfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mfence: ; HASWELL: # BB#0: @@ -1402,7 +1402,7 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; SANDY-LABEL: test_maskmovdqu: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maskmovdqu: ; HASWELL: # BB#0: @@ -1440,8 +1440,8 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: @@ -1483,8 +1483,8 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_maxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_maxsd: ; HASWELL: # BB#0: @@ -1526,8 +1526,8 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_minpd: ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: @@ -1569,8 +1569,8 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_minsd: ; SANDY: # BB#0: ; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_minsd: ; HASWELL: # BB#0: @@ -1614,10 +1614,10 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: @@ -1662,10 +1662,10 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; SANDY-LABEL: test_movdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movdqa: ; HASWELL: # BB#0: @@ -1710,10 +1710,10 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; SANDY-LABEL: test_movdqu: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movdqu: ; HASWELL: # BB#0: @@ -1768,12 +1768,12 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SANDY-LABEL: test_movd: ; SANDY: # BB#0: ; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:0.33] -; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovd %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] +; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movd: ; HASWELL: # BB#0: @@ -1838,13 +1838,13 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; ; SANDY-LABEL: test_movd_64: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.33] -; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] +; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, %rax # sched: [1:0.33] -; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] +; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movd_64: ; HASWELL: # BB#0: @@ -1900,10 +1900,10 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movhpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movhpd: ; HASWELL: # BB#0: @@ -1951,10 +1951,10 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movlpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movlpd: ; HASWELL: # BB#0: @@ -1998,8 +1998,8 @@ define i32 @test_movmskpd(<2 x double> %a0) { ; ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: @@ -2039,8 +2039,8 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: @@ -2080,8 +2080,8 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: @@ -2123,10 +2123,10 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; ; SANDY-LABEL: test_movq_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movq_mem: ; HASWELL: # BB#0: @@ -2174,7 +2174,7 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movq_reg: ; HASWELL: # BB#0: @@ -2216,10 +2216,10 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; ; SANDY-LABEL: test_movsd_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] ; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsd_mem: ; HASWELL: # BB#0: @@ -2266,7 +2266,7 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; SANDY-LABEL: test_movsd_reg: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsd_reg: ; HASWELL: # BB#0: @@ -2305,10 +2305,10 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: @@ -2351,8 +2351,8 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: @@ -2393,8 +2393,8 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_mulsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mulsd: ; HASWELL: # BB#0: @@ -2437,10 +2437,10 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_orpd: ; HASWELL: # BB#0: @@ -2496,8 +2496,8 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_packssdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packssdw: ; HASWELL: # BB#0: @@ -2548,8 +2548,8 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_packsswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packsswb: ; HASWELL: # BB#0: @@ -2600,8 +2600,8 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_packuswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packuswb: ; HASWELL: # BB#0: @@ -2648,8 +2648,8 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_paddb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddb: ; HASWELL: # BB#0: @@ -2694,8 +2694,8 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_paddd: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddd: ; HASWELL: # BB#0: @@ -2736,8 +2736,8 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_paddq: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddq: ; HASWELL: # BB#0: @@ -2781,9 +2781,9 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_paddsb: ; SANDY: # BB#0: -; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsb: ; HASWELL: # BB#0: @@ -2828,9 +2828,9 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_paddsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsw: ; HASWELL: # BB#0: @@ -2876,8 +2876,8 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_paddusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusb: ; HASWELL: # BB#0: @@ -2923,8 +2923,8 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_paddusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusw: ; HASWELL: # BB#0: @@ -2969,9 +2969,9 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_paddw: ; SANDY: # BB#0: -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddw: ; HASWELL: # BB#0: @@ -3015,9 +3015,9 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pand: ; SANDY: # BB#0: ; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pand: ; HASWELL: # BB#0: @@ -3070,9 +3070,9 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pandn: ; SANDY: # BB#0: ; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pandn: ; HASWELL: # BB#0: @@ -3122,8 +3122,8 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pavgb: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgb: ; HASWELL: # BB#0: @@ -3169,8 +3169,8 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pavgw: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgw: ; HASWELL: # BB#0: @@ -3217,9 +3217,9 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pcmpeqb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqb: ; HASWELL: # BB#0: @@ -3269,9 +3269,9 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pcmpeqd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqd: ; HASWELL: # BB#0: @@ -3321,9 +3321,9 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pcmpeqw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqw: ; HASWELL: # BB#0: @@ -3374,9 +3374,9 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pcmpgtb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtb: ; HASWELL: # BB#0: @@ -3427,9 +3427,9 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pcmpgtd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtd: ; HASWELL: # BB#0: @@ -3480,9 +3480,9 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pcmpgtw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtw: ; HASWELL: # BB#0: @@ -3526,9 +3526,9 @@ define i16 @test_pextrw(<8 x i16> %a0) { ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: @@ -3570,9 +3570,9 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; ; SANDY-LABEL: test_pinsrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrw: ; HASWELL: # BB#0: @@ -3620,9 +3620,9 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmaddwd: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddwd: ; HASWELL: # BB#0: @@ -3669,8 +3669,8 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pmaxsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsw: ; HASWELL: # BB#0: @@ -3716,8 +3716,8 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pmaxub: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxub: ; HASWELL: # BB#0: @@ -3763,8 +3763,8 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pminsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsw: ; HASWELL: # BB#0: @@ -3810,8 +3810,8 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pminub: ; SANDY: # BB#0: ; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminub: ; HASWELL: # BB#0: @@ -3851,8 +3851,8 @@ define i32 @test_pmovmskb(<16 x i8> %a0) { ; ; SANDY-LABEL: test_pmovmskb: ; SANDY: # BB#0: -; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovmskb: ; HASWELL: # BB#0: @@ -3891,7 +3891,7 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhuw: ; HASWELL: # BB#0: @@ -3932,9 +3932,9 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmulhw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhw: ; HASWELL: # BB#0: @@ -3975,9 +3975,9 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmullw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmullw: ; HASWELL: # BB#0: @@ -4027,7 +4027,7 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmuludq: ; HASWELL: # BB#0: @@ -4073,9 +4073,9 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_por: ; SANDY: # BB#0: ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_por: ; HASWELL: # BB#0: @@ -4126,9 +4126,9 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_psadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psadbw: ; HASWELL: # BB#0: @@ -4176,9 +4176,9 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_pshufd: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:0.50] +; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufd: ; HASWELL: # BB#0: @@ -4226,10 +4226,10 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; ; SANDY-LABEL: test_pshufhw: ; SANDY: # BB#0: -; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufhw: ; HASWELL: # BB#0: @@ -4278,9 +4278,9 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-LABEL: test_pshuflw: ; SANDY: # BB#0: ; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshuflw: ; HASWELL: # BB#0: @@ -4326,10 +4326,10 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pslld: ; SANDY: # BB#0: -; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pslld: ; HASWELL: # BB#0: @@ -4378,7 +4378,7 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; SANDY-LABEL: test_pslldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pslldq: ; HASWELL: # BB#0: @@ -4417,10 +4417,10 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_psllq: ; SANDY: # BB#0: -; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllq: ; HASWELL: # BB#0: @@ -4468,10 +4468,10 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psllw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllw: ; HASWELL: # BB#0: @@ -4519,10 +4519,10 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_psrad: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrad: ; HASWELL: # BB#0: @@ -4570,10 +4570,10 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psraw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psraw: ; HASWELL: # BB#0: @@ -4621,10 +4621,10 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_psrld: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrld: ; HASWELL: # BB#0: @@ -4673,7 +4673,7 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; SANDY-LABEL: test_psrldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrldq: ; HASWELL: # BB#0: @@ -4712,10 +4712,10 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_psrlq: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlq: ; HASWELL: # BB#0: @@ -4763,10 +4763,10 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psrlw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlw: ; HASWELL: # BB#0: @@ -4816,8 +4816,8 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubb: ; HASWELL: # BB#0: @@ -4862,8 +4862,8 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_psubd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubd: ; HASWELL: # BB#0: @@ -4904,8 +4904,8 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_psubq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubq: ; HASWELL: # BB#0: @@ -4950,8 +4950,8 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsb: ; HASWELL: # BB#0: @@ -4997,8 +4997,8 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsw: ; HASWELL: # BB#0: @@ -5044,8 +5044,8 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusb: ; HASWELL: # BB#0: @@ -5091,8 +5091,8 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusw: ; HASWELL: # BB#0: @@ -5138,8 +5138,8 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubw: ; HASWELL: # BB#0: @@ -5184,8 +5184,8 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_punpckhbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhbw: ; HASWELL: # BB#0: @@ -5231,9 +5231,9 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_punpckhdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:0.50] +; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhdq: ; HASWELL: # BB#0: @@ -5279,10 +5279,10 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; ; SANDY-LABEL: test_punpckhqdq: ; SANDY: # BB#0: -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:0.50] +; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhqdq: ; HASWELL: # BB#0: @@ -5330,8 +5330,8 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_punpckhwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhwd: ; HASWELL: # BB#0: @@ -5375,9 +5375,9 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_punpcklbw: ; SANDY: # BB#0: -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklbw: ; HASWELL: # BB#0: @@ -5423,9 +5423,9 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_punpckldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:0.50] +; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckldq: ; HASWELL: # BB#0: @@ -5472,9 +5472,9 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-LABEL: test_punpcklqdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:0.50] +; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklqdq: ; HASWELL: # BB#0: @@ -5522,8 +5522,8 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_punpcklwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklwd: ; HASWELL: # BB#0: @@ -5567,9 +5567,9 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pxor: ; SANDY: # BB#0: ; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pxor: ; HASWELL: # BB#0: @@ -5616,9 +5616,9 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: @@ -5665,10 +5665,10 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [28:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: @@ -5720,11 +5720,11 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_sqrtsd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] -; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] -; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] +; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] +; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # BB#0: @@ -5771,8 +5771,8 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: @@ -5813,8 +5813,8 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_subsd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_subsd: ; HASWELL: # BB#0: @@ -5879,16 +5879,16 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SANDY-LABEL: test_ucomisd: ; SANDY: # BB#0: ; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %cl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.33] -; SANDY-NEXT: sete %dl # sched: [1:0.33] +; SANDY-NEXT: setnp %al # sched: [1:1.00] +; SANDY-NEXT: sete %dl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ucomisd: ; HASWELL: # BB#0: @@ -5950,9 +5950,9 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_unpckhpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: @@ -6005,9 +6005,9 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: @@ -6053,10 +6053,10 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_xorpd: ; SANDY: # BB#0: -; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index 482b2fcab64..ef1ddae4532 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -31,8 +31,8 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: @@ -74,8 +74,8 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: @@ -116,9 +116,9 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; SANDY-LABEL: test_haddpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: @@ -159,9 +159,9 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: @@ -202,9 +202,9 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: @@ -245,9 +245,9 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] +; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: @@ -287,8 +287,8 @@ define <16 x i8> @test_lddqu(i8* %a0) { ; ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: @@ -330,9 +330,9 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: @@ -380,9 +380,9 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: @@ -430,9 +430,9 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index 340b9abe887..1ab1598fcab 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -25,10 +25,10 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: @@ -65,9 +65,9 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: @@ -107,9 +107,9 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: @@ -150,9 +150,9 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] +; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: @@ -187,9 +187,9 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_dppd: ; SANDY: # BB#0: -; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_dppd: ; HASWELL: # BB#0: @@ -224,9 +224,9 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: @@ -262,8 +262,8 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) ; SANDY-LABEL: test_insertps: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_insertps: ; HASWELL: # BB#0: @@ -296,8 +296,8 @@ define <2 x i64> @test_movntdqa(i8* %a0) { ; ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: @@ -328,9 +328,9 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_mpsadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_mpsadbw: ; HASWELL: # BB#0: @@ -367,8 +367,8 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_packusdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packusdw: ; HASWELL: # BB#0: @@ -411,8 +411,8 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; SANDY-LABEL: test_pblendvb: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pblendvb: ; HASWELL: # BB#0: @@ -448,8 +448,8 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pblendw: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pblendw: ; HASWELL: # BB#0: @@ -483,9 +483,9 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_pcmpeqq: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqq: ; HASWELL: # BB#0: @@ -521,9 +521,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; ; SANDY-LABEL: test_pextrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrb: ; HASWELL: # BB#0: @@ -558,9 +558,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; ; SANDY-LABEL: test_pextrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # BB#0: @@ -594,9 +594,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; ; SANDY-LABEL: test_pextrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] +; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrq: ; HASWELL: # BB#0: @@ -630,9 +630,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] +; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: @@ -667,9 +667,9 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; ; SANDY-LABEL: test_phminposuw: ; SANDY: # BB#0: -; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phminposuw: ; HASWELL: # BB#0: @@ -704,9 +704,9 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; ; SANDY-LABEL: test_pinsrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrb: ; HASWELL: # BB#0: @@ -740,9 +740,9 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; ; SANDY-LABEL: test_pinsrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrd: ; HASWELL: # BB#0: @@ -778,10 +778,10 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; ; SANDY-LABEL: test_pinsrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrq: ; HASWELL: # BB#0: @@ -819,8 +819,8 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pmaxsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsb: ; HASWELL: # BB#0: @@ -856,8 +856,8 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pmaxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsd: ; HASWELL: # BB#0: @@ -893,8 +893,8 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pmaxud: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxud: ; HASWELL: # BB#0: @@ -930,8 +930,8 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pmaxuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxuw: ; HASWELL: # BB#0: @@ -967,8 +967,8 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pminsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsb: ; HASWELL: # BB#0: @@ -1004,8 +1004,8 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pminsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsd: ; HASWELL: # BB#0: @@ -1041,8 +1041,8 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pminud: ; SANDY: # BB#0: ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminud: ; HASWELL: # BB#0: @@ -1078,8 +1078,8 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pminuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminuw: ; HASWELL: # BB#0: @@ -1118,9 +1118,9 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxbw: ; HASWELL: # BB#0: @@ -1162,9 +1162,9 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxbd: ; HASWELL: # BB#0: @@ -1206,9 +1206,9 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxbq: ; HASWELL: # BB#0: @@ -1250,9 +1250,9 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-LABEL: test_pmovsxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxdq: ; HASWELL: # BB#0: @@ -1294,9 +1294,9 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-LABEL: test_pmovsxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxwd: ; HASWELL: # BB#0: @@ -1338,9 +1338,9 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-LABEL: test_pmovsxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovsxwq: ; HASWELL: # BB#0: @@ -1382,9 +1382,9 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] +; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxbw: ; HASWELL: # BB#0: @@ -1426,9 +1426,9 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxbd: ; HASWELL: # BB#0: @@ -1470,9 +1470,9 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxbq: ; HASWELL: # BB#0: @@ -1514,9 +1514,9 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-LABEL: test_pmovzxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxdq: ; HASWELL: # BB#0: @@ -1558,9 +1558,9 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-LABEL: test_pmovzxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxwd: ; HASWELL: # BB#0: @@ -1602,9 +1602,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-LABEL: test_pmovzxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50] +; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmovzxwq: ; HASWELL: # BB#0: @@ -1642,9 +1642,9 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pmuldq: ; SANDY: # BB#0: -; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmuldq: ; HASWELL: # BB#0: @@ -1680,9 +1680,9 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pmulld: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulld: ; HASWELL: # BB#0: @@ -1724,13 +1724,13 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_ptest: ; SANDY: # BB#0: -; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: setb %al # sched: [1:0.33] -; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [5:0.50] -; SANDY-NEXT: setb %cl # sched: [1:0.33] +; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: setb %al # sched: [1:1.00] +; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-NEXT: setb %cl # sched: [1:1.00] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ptest: ; HASWELL: # BB#0: @@ -1778,9 +1778,9 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_roundpd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: @@ -1822,9 +1822,9 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_roundps: ; SANDY: # BB#0: ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [7:1.00] +; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: @@ -1867,9 +1867,9 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SANDY-LABEL: test_roundsd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundsd: ; HASWELL: # BB#0: @@ -1912,9 +1912,9 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SANDY-LABEL: test_roundss: ; SANDY: # BB#0: ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_roundss: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/sse42-schedule.ll b/llvm/test/CodeGen/X86/sse42-schedule.ll index afc48bc57ee..7ce9ffdbd0e 100644 --- a/llvm/test/CodeGen/X86/sse42-schedule.ll +++ b/llvm/test/CodeGen/X86/sse42-schedule.ll @@ -26,9 +26,9 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SANDY-LABEL: crc32_32_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # BB#0: @@ -68,9 +68,9 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SANDY-LABEL: crc32_32_16: ; SANDY: # BB#0: ; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # BB#0: @@ -112,7 +112,7 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # BB#0: @@ -152,9 +152,9 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SANDY-LABEL: crc32_64_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # BB#0: @@ -196,7 +196,7 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # BB#0: @@ -256,7 +256,7 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] ; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def> ; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpestri: ; HASWELL: # BB#0: @@ -320,7 +320,7 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] ; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] ; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpestrm: ; HASWELL: # BB#0: @@ -369,12 +369,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pcmpistri: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] ; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] ; SANDY-NEXT: # kill: %ECX<def> %ECX<kill> %RCX<def> ; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpistri: ; HASWELL: # BB#0: @@ -416,9 +416,9 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pcmpistrm: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpistrm: ; HASWELL: # BB#0: @@ -453,9 +453,9 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_pcmpgtq: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtq: ; HASWELL: # BB#0: diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll index 8b7a0c0ec02..f24969a30c3 100644 --- a/llvm/test/CodeGen/X86/ssse3-schedule.ll +++ b/llvm/test/CodeGen/X86/ssse3-schedule.ll @@ -35,9 +35,9 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SANDY-LABEL: test_pabsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsb: ; HASWELL: # BB#0: @@ -86,9 +86,9 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_pabsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] +; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsd: ; HASWELL: # BB#0: @@ -136,7 +136,7 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-LABEL: test_pabsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsw: ; HASWELL: # BB#0: @@ -182,8 +182,8 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_palignr: ; SANDY: # BB#0: ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_palignr: ; HASWELL: # BB#0: @@ -223,9 +223,9 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_phaddd: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddd: ; HASWELL: # BB#0: @@ -274,9 +274,9 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phaddsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddsw: ; HASWELL: # BB#0: @@ -317,9 +317,9 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phaddw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddw: ; HASWELL: # BB#0: @@ -360,9 +360,9 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_phsubd: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubd: ; HASWELL: # BB#0: @@ -411,9 +411,9 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phsubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubsw: ; HASWELL: # BB#0: @@ -454,9 +454,9 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phsubw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] +; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubw: ; HASWELL: # BB#0: @@ -497,9 +497,9 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pmaddubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddubsw: ; HASWELL: # BB#0: @@ -538,8 +538,8 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmulhrsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhrsw: ; HASWELL: # BB#0: @@ -579,8 +579,8 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pshufb: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufb: ; HASWELL: # BB#0: @@ -630,8 +630,8 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psignb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignb: ; HASWELL: # BB#0: @@ -681,8 +681,8 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_psignd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignd: ; HASWELL: # BB#0: @@ -732,8 +732,8 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psignw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [5:1.00] +; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignw: ; HASWELL: # BB#0: |

