diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/sse2-schedule.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-schedule.ll | 5948 |
1 files changed, 5899 insertions, 49 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 0c30bada475..db6d7a5c198 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -1,15 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_addpd: @@ -30,42 +38,84 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addpd: ; SKX: # %bb.0: ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -96,42 +146,84 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addsd: ; SKX: # %bb.0: ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -165,6 +257,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_andpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -172,6 +271,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_andpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_andpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -179,6 +285,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_andpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_andpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -186,6 +299,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_andpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -193,6 +313,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_andpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_andpd: ; SKX: # %bb.0: ; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -200,6 +327,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_andpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_andpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -207,6 +341,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_andpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_andpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -246,6 +387,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_andnotpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andnotpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -253,6 +401,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_andnotpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -260,6 +415,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_andnotpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -267,6 +429,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_andnotpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -274,6 +443,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_andnotpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -281,6 +457,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_andnotpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -288,6 +471,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_andnotpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_andnotpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -329,36 +519,71 @@ define void @test_clflush(i8* %p){ ; SLM-NEXT: clflush (%rdi) # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_clflush: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_clflush: ; SANDY: # %bb.0: ; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_clflush: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_clflush: ; HASWELL: # %bb.0: ; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_clflush: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_clflush: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_clflush: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_clflush: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_clflush: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_clflush: ; SKX: # %bb.0: ; SKX-NEXT: clflush (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_clflush: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_clflush: ; BTVER2: # %bb.0: ; BTVER2-NEXT: clflush (%rdi) # sched: [5:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_clflush: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_clflush: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50] @@ -390,6 +615,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cmppd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmppd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -397,6 +629,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cmppd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -404,6 +643,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cmppd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cmppd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -411,6 +657,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cmppd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] @@ -418,14 +671,27 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cmppd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cmppd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cmppd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] +; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] @@ -433,6 +699,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cmppd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cmppd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -467,42 +740,84 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cmpsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmpsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cmpsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cmpsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cmpsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cmpsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cmpsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cmpsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cmpsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cmpsd: ; SKX: # %bb.0: ; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cmpsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cmpsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cmpsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cmpsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -562,6 +877,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_comisd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_comisd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -576,6 +905,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_comisd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_comisd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -590,6 +933,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_comisd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_comisd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -604,6 +961,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_comisd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_comisd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -618,6 +989,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_comisd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_comisd: ; SKX: # %bb.0: ; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -632,6 +1017,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_comisd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_comisd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -646,6 +1045,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_comisd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_comisd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -689,6 +1102,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtdq2pd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] @@ -696,6 +1116,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtdq2pd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] @@ -703,6 +1130,14 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtdq2pd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtdq2pd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] @@ -710,6 +1145,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtdq2pd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -717,6 +1159,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtdq2pd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtdq2pd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -724,6 +1173,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtdq2pd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] @@ -731,6 +1187,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtdq2pd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtdq2pd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00] @@ -769,6 +1232,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtdq2ps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] @@ -776,6 +1246,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtdq2ps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] @@ -783,6 +1260,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtdq2ps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtdq2ps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] @@ -790,6 +1274,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtdq2ps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] @@ -797,6 +1288,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtdq2ps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # %bb.0: ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] @@ -804,6 +1302,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtdq2ps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] @@ -811,6 +1316,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtdq2ps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtdq2ps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00] @@ -847,6 +1359,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtpd2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -854,6 +1373,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtpd2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -861,6 +1387,14 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtpd2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtpd2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -868,6 +1402,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtpd2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -875,6 +1416,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtpd2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -882,6 +1430,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtpd2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] @@ -889,6 +1444,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtpd2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtpd2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00] @@ -926,6 +1488,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtpd2ps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] @@ -933,6 +1502,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtpd2ps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] @@ -940,6 +1516,14 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtpd2ps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtpd2ps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] @@ -947,6 +1531,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtpd2ps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] @@ -954,6 +1545,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtpd2ps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtpd2ps: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] @@ -961,6 +1559,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtpd2ps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] @@ -968,6 +1573,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtpd2ps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtpd2ps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00] @@ -1005,6 +1617,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtps2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtps2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1012,6 +1631,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtps2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1019,6 +1645,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtps2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtps2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1026,6 +1659,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtps2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] @@ -1033,6 +1673,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtps2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtps2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] @@ -1040,6 +1687,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtps2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] @@ -1047,6 +1701,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtps2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtps2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00] @@ -1084,6 +1745,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtps2pd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] +; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtps2pd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] @@ -1091,6 +1759,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtps2pd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] +; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtps2pd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] @@ -1098,6 +1773,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtps2pd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtps2pd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] @@ -1105,6 +1787,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtps2pd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtps2pd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -1112,6 +1801,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtps2pd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtps2pd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -1119,6 +1815,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtps2pd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtps2pd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00] @@ -1126,6 +1829,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtps2pd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtps2pd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00] @@ -1163,6 +1873,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsd2si: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] +; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsd2si: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] @@ -1170,6 +1887,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsd2si: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsd2si: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] @@ -1177,6 +1901,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsd2si: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsd2si: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] @@ -1184,6 +1915,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsd2si: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2si: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1191,6 +1929,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsd2si: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] +; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsd2si: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1198,6 +1943,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsd2si: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsd2si: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00] @@ -1205,6 +1957,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsd2si: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsd2si: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] @@ -1243,6 +2002,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsd2siq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] +; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsd2siq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] @@ -1250,6 +2016,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsd2siq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsd2siq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] @@ -1257,6 +2030,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsd2siq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsd2siq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] @@ -1264,6 +2044,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsd2siq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2siq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1271,6 +2058,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsd2siq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] +; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsd2siq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1278,6 +2072,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsd2siq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsd2siq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00] @@ -1285,6 +2086,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsd2siq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsd2siq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] @@ -1327,6 +2135,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsd2ss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsd2ss: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -1335,6 +2151,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsd2ss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsd2ss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -1343,6 +2167,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsd2ss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsd2ss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -1351,6 +2183,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsd2ss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2ss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1359,6 +2199,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsd2ss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsd2ss: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1367,6 +2215,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsd2ss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsd2ss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] @@ -1375,6 +2231,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsd2ss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsd2ss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] @@ -1411,6 +2275,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsi2sd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsi2sd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1418,6 +2289,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsi2sd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsi2sd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1425,6 +2303,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsi2sd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsi2sd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1432,6 +2317,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsi2sd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1439,6 +2331,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsi2sd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsi2sd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1446,6 +2345,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsi2sd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsi2sd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00] @@ -1453,6 +2359,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsi2sd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsi2sd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1488,6 +2401,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsi2sdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsi2sdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1495,6 +2415,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsi2sdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsi2sdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1502,6 +2429,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsi2sdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsi2sdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1509,6 +2443,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsi2sdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1516,6 +2457,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsi2sdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsi2sdq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1523,6 +2471,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsi2sdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsi2sdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00] @@ -1530,6 +2485,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsi2sdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsi2sdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1571,6 +2533,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtss2sd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtss2sd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] @@ -1579,6 +2549,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtss2sd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] +; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtss2sd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] @@ -1587,6 +2565,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtss2sd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtss2sd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] @@ -1595,6 +2581,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtss2sd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtss2sd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1603,6 +2597,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtss2sd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtss2sd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1611,6 +2613,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtss2sd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtss2sd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] @@ -1619,6 +2629,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtss2sd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtss2sd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] @@ -1656,6 +2674,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttpd2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttpd2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -1663,6 +2688,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttpd2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttpd2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -1670,6 +2702,14 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttpd2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttpd2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -1677,6 +2717,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttpd2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttpd2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -1684,6 +2731,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttpd2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -1691,6 +2745,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttpd2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] @@ -1698,6 +2759,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttpd2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttpd2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00] @@ -1736,6 +2804,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttps2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttps2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1743,6 +2818,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttps2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttps2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1750,6 +2832,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttps2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttps2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1757,6 +2846,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttps2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttps2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] @@ -1764,6 +2860,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttps2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttps2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] @@ -1771,6 +2874,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttps2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] @@ -1778,6 +2888,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttps2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttps2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00] @@ -1813,6 +2930,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttsd2si: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] +; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttsd2si: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] @@ -1820,6 +2944,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttsd2si: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttsd2si: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] @@ -1827,6 +2958,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttsd2si: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttsd2si: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] @@ -1834,6 +2972,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttsd2si: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2si: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1841,6 +2986,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttsd2si: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] +; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] +; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttsd2si: ; SKX: # %bb.0: ; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1848,6 +3000,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttsd2si: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttsd2si: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00] @@ -1855,6 +3014,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttsd2si: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttsd2si: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] @@ -1890,6 +3056,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttsd2siq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] +; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttsd2siq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] @@ -1897,6 +3070,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttsd2siq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttsd2siq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] @@ -1904,6 +3084,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttsd2siq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttsd2siq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] @@ -1911,6 +3098,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttsd2siq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2siq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1918,6 +3112,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttsd2siq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] +; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] +; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttsd2siq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1925,6 +3126,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttsd2siq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttsd2siq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00] @@ -1932,6 +3140,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttsd2siq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttsd2siq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] @@ -1964,42 +3179,84 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [37:34.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_divpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:1.00] +; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] ; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_divpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_divpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_divpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_divpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_divpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_divpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_divpd: ; SKX: # %bb.0: ; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_divpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00] +; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_divpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] ; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_divpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00] +; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_divpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] @@ -2030,42 +3287,84 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:34.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_divsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:1.00] +; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] ; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_divsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_divsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_divsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_divsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_divsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_divsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_divsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_divsd: ; SKX: # %bb.0: ; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_divsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00] +; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_divsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] ; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_divsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00] +; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_divsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] @@ -2099,36 +3398,71 @@ define void @test_lfence() { ; SLM-NEXT: lfence # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_lfence: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: lfence # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_lfence: ; SANDY: # %bb.0: ; SANDY-NEXT: lfence # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_lfence: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: lfence # sched: [2:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_lfence: ; HASWELL: # %bb.0: ; HASWELL-NEXT: lfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_lfence: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_lfence: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: lfence # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_lfence: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_lfence: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: lfence # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_lfence: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: lfence # sched: [2:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_lfence: ; SKX: # %bb.0: ; SKX-NEXT: lfence # sched: [2:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_lfence: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: lfence # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_lfence: ; BTVER2: # %bb.0: ; BTVER2-NEXT: lfence # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_lfence: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_lfence: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: lfence # sched: [1:0.50] @@ -2160,36 +3494,71 @@ define void @test_mfence() { ; SLM-NEXT: mfence # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mfence: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mfence # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mfence: ; SANDY: # %bb.0: ; SANDY-NEXT: mfence # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mfence: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mfence # sched: [2:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mfence: ; HASWELL: # %bb.0: ; HASWELL-NEXT: mfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mfence: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mfence: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: mfence # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mfence: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mfence: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: mfence # sched: [3:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mfence: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mfence # sched: [3:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mfence: ; SKX: # %bb.0: ; SKX-NEXT: mfence # sched: [3:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mfence: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mfence # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mfence: ; BTVER2: # %bb.0: ; BTVER2-NEXT: mfence # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mfence: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mfence: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: mfence # sched: [1:0.50] @@ -2219,36 +3588,71 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maskmovdqu: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maskmovdqu: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maskmovdqu: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maskmovdqu: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maskmovdqu: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maskmovdqu: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maskmovdqu: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maskmovdqu: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maskmovdqu: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maskmovdqu: ; SKX: # %bb.0: ; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maskmovdqu: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maskmovdqu: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maskmovdqu: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maskmovdqu: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:?] @@ -2277,42 +3681,84 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maxpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maxpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maxpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maxpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maxpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maxpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maxpd: ; SKX: # %bb.0: ; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maxpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maxpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maxpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2344,42 +3790,84 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maxsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maxsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maxsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maxsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maxsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maxsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maxsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maxsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maxsd: ; SKX: # %bb.0: ; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maxsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maxsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maxsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maxsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2411,42 +3899,84 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_minpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_minpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_minpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_minpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_minpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_minpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_minpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_minpd: ; SKX: # %bb.0: ; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_minpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_minpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_minpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_minpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2478,42 +4008,84 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_minsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_minsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_minsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_minsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_minsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_minsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_minsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_minsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_minsd: ; SKX: # %bb.0: ; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_minsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_minsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_minsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_minsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2548,6 +4120,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movapd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movapd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2555,6 +4134,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movapd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movapd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2562,6 +4148,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movapd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movapd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] @@ -2569,6 +4162,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movapd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2576,6 +4176,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movapd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movapd: ; SKX: # %bb.0: ; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2583,6 +4190,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movapd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movapd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00] @@ -2590,6 +4204,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movapd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movapd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50] @@ -2624,6 +4245,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movdqa: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movdqa: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2631,6 +4259,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movdqa: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movdqa: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2638,6 +4273,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movdqa: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movdqa: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] @@ -2645,6 +4287,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movdqa: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movdqa: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2652,6 +4301,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movdqa: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movdqa: ; SKX: # %bb.0: ; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2659,6 +4315,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movdqa: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movdqa: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00] @@ -2666,6 +4329,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movdqa: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movdqa: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50] @@ -2700,6 +4370,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movdqu: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movdqu: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2707,6 +4384,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movdqu: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movdqu: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2714,6 +4398,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movdqu: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movdqu: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] @@ -2721,6 +4412,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movdqu: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movdqu: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2728,6 +4426,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movdqu: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movdqu: ; SKX: # %bb.0: ; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2735,6 +4440,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movdqu: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movdqu: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00] @@ -2742,6 +4454,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movdqu: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movdqu: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50] @@ -2785,6 +4504,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2795,6 +4524,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] +; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2805,6 +4544,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2815,6 +4564,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2825,16 +4584,36 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movd: ; SKX: # %bb.0: -; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vmovd %edi, %xmm2 # sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] +; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: vmovd %xmm2, (%rsi) # sched: [1:1.00] +; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2845,6 +4624,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] @@ -2896,6 +4685,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movd_64: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] +; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movd_64: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2906,6 +4705,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movd_64: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] +; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movd_64: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2916,6 +4725,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movd_64: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movd_64: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2926,6 +4745,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movd_64: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movd_64: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2936,16 +4765,36 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movd_64: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] +; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movd_64: ; SKX: # %bb.0: -; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vmovq %rdi, %xmm2 # sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] +; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKX-NEXT: vmovq %xmm2, (%rsi) # sched: [1:1.00] +; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movd_64: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movd_64: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] @@ -2956,6 +4805,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movd_64: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movd_64: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] @@ -2998,6 +4857,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movhpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movhpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] @@ -3005,6 +4871,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movhpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movhpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3012,6 +4885,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movhpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movhpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3019,6 +4899,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movhpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movhpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3026,6 +4913,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movhpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movhpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3033,6 +4927,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movhpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movhpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3040,6 +4941,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movhpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movhpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] @@ -3077,6 +4985,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movlpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movlpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] @@ -3084,6 +4999,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movlpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movlpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3091,6 +5013,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movlpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movlpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3098,6 +5027,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movlpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movlpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3105,6 +5041,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movlpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movlpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3112,6 +5055,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movlpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movlpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3119,6 +5069,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movlpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movlpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] @@ -3152,36 +5109,71 @@ define i32 @test_movmskpd(<2 x double> %a0) { ; SLM-NEXT: movmskpd %xmm0, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movmskpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movmskpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movmskpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movmskpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movmskpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movmskpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movmskpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movmskpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movmskpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movmskpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movmskpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00] @@ -3212,42 +5204,84 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movntdqa: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntdqa: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movntdqa: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movntdqa: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movntdqa: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movntdqa: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movntdqa: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movntdqa: ; SKX: # %bb.0: ; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movntdqa: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movntdqa: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movntdqa: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] @@ -3277,42 +5311,84 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movntpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movntpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movntpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movntpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movntpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movntpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movntpd: ; SKX: # %bb.0: ; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movntpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movntpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movntpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] @@ -3345,6 +5421,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movq_mem: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movq_mem: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] @@ -3352,6 +5435,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movq_mem: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movq_mem: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3359,6 +5449,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movq_mem: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movq_mem: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3366,6 +5463,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movq_mem: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movq_mem: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3373,6 +5477,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movq_mem: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movq_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3380,6 +5491,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movq_mem: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movq_mem: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] @@ -3387,6 +5505,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movq_mem: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movq_mem: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] @@ -3422,42 +5547,84 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movq_reg: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:1.00] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movq_reg: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movq_reg: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movq_reg: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movq_reg: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movq_reg: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movq_reg: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movq_reg: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movq_reg: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movq_reg: ; SKX: # %bb.0: ; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movq_reg: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movq_reg: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] ; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movq_reg: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movq_reg: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] @@ -3490,6 +5657,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movsd_mem: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movsd_mem: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] @@ -3497,6 +5671,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movsd_mem: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movsd_mem: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3504,6 +5685,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movsd_mem: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movsd_mem: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3511,6 +5699,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movsd_mem: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movsd_mem: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3518,6 +5713,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movsd_mem: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movsd_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3525,6 +5727,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movsd_mem: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movsd_mem: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] @@ -3532,6 +5741,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movsd_mem: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movsd_mem: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] @@ -3567,36 +5783,78 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movsd_reg: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movsd_reg: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movsd_reg: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movsd_reg: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movsd_reg: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movsd_reg: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movsd_reg: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movsd_reg: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movsd_reg: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movsd_reg: ; SKX: # %bb.0: ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movsd_reg: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movsd_reg: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movsd_reg: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movsd_reg: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] @@ -3627,6 +5885,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movupd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movupd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3634,6 +5899,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movupd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movupd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3641,6 +5913,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movupd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movupd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] @@ -3648,6 +5927,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movupd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3655,6 +5941,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movupd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movupd: ; SKX: # %bb.0: ; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3662,6 +5955,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movupd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movupd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00] @@ -3669,6 +5969,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movupd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movupd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50] @@ -3700,42 +6007,84 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mulpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mulpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mulpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] +; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mulpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] ; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mulpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mulpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mulpd: ; SKX: # %bb.0: ; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mulpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00] +; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mulpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] +; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mulpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] @@ -3766,42 +6115,84 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mulsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mulsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mulsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mulsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] +; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mulsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] ; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mulsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mulsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mulsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mulsd: ; SKX: # %bb.0: ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mulsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00] +; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mulsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mulsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] +; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mulsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] @@ -3835,6 +6226,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_orpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_orpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3842,6 +6240,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_orpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_orpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3849,6 +6254,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_orpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_orpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3856,6 +6268,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_orpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_orpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3863,6 +6282,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_orpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_orpd: ; SKX: # %bb.0: ; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3870,6 +6296,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_orpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_orpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3877,6 +6310,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_orpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_orpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -3917,42 +6357,84 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packssdw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packssdw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packssdw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packssdw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packssdw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packssdw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packssdw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packssdw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packssdw: ; SKX: # %bb.0: ; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packssdw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packssdw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packssdw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packssdw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -3989,42 +6471,84 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packsswb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packsswb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packsswb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packsswb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packsswb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packsswb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packsswb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packsswb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packsswb: ; SKX: # %bb.0: ; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packsswb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packsswb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packsswb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packsswb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4061,42 +6585,84 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packuswb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packuswb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packuswb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packuswb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packuswb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packuswb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packuswb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packuswb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packuswb: ; SKX: # %bb.0: ; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packuswb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packuswb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packuswb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packuswb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4133,42 +6699,84 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddb: ; SKX: # %bb.0: ; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4203,42 +6811,84 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddd: ; SKX: # %bb.0: ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4269,42 +6919,84 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddq: ; SKX: # %bb.0: ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4339,42 +7031,84 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddsb: ; SKX: # %bb.0: ; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4410,42 +7144,84 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddsw: ; SKX: # %bb.0: ; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4481,42 +7257,84 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddusb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddusb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddusb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddusb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddusb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddusb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddusb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddusb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddusb: ; SKX: # %bb.0: ; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddusb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddusb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddusb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddusb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4552,42 +7370,84 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddusw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddusw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddusw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddusw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddusw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddusw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddusw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddusw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddusw: ; SKX: # %bb.0: ; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddusw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddusw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddusw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddusw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4623,42 +7483,84 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddw: ; SKX: # %bb.0: ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4692,6 +7594,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pand: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pand: ; SANDY: # %bb.0: ; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4699,6 +7608,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pand: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pand: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4706,6 +7622,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pand: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pand: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4713,6 +7636,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pand: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4720,6 +7650,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pand: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pand: ; SKX: # %bb.0: ; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4727,6 +7664,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pand: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pand: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4734,6 +7678,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pand: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pand: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4775,6 +7726,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pandn: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pandn: ; SANDY: # %bb.0: ; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4782,6 +7742,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pandn: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pandn: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4789,6 +7758,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pandn: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pandn: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4796,6 +7774,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pandn: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4803,6 +7790,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pandn: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pandn: ; SKX: # %bb.0: ; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4810,6 +7806,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pandn: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pandn: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4817,6 +7822,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pandn: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pandn: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4855,42 +7869,84 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pavgb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pavgb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pavgb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pavgb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pavgb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pavgb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pavgb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pavgb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pavgb: ; SKX: # %bb.0: ; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pavgb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pavgb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pavgb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pavgb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4935,42 +7991,84 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pavgw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pavgw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pavgw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pavgw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pavgw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pavgw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pavgw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pavgw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pavgw: ; SKX: # %bb.0: ; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pavgw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pavgw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pavgw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pavgw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5016,6 +8114,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5023,6 +8128,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5030,6 +8142,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5037,6 +8156,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5044,14 +8170,27 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqb: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5059,6 +8198,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5097,6 +8243,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5104,6 +8257,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5111,6 +8271,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5118,6 +8285,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5125,14 +8299,27 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqd: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5140,6 +8327,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5178,6 +8372,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5185,6 +8386,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5192,6 +8400,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5199,6 +8414,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5206,14 +8428,27 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqw: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5221,6 +8456,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5260,6 +8502,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpgtb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpgtb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5267,6 +8517,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpgtb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpgtb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5274,6 +8532,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpgtb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpgtb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5281,6 +8547,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpgtb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5288,14 +8562,29 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpgtb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpgtb: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpgtb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpgtb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5303,6 +8592,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpgtb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpgtb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5342,6 +8639,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpgtd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpgtd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5349,6 +8654,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpgtd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpgtd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5356,6 +8669,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpgtd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpgtd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5363,6 +8684,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpgtd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5370,14 +8699,29 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpgtd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpgtd: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpgtd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpgtd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5385,6 +8729,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpgtd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpgtd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5424,6 +8776,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpgtw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpgtw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5431,6 +8791,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpgtw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpgtw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5438,6 +8806,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpgtw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpgtw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5445,6 +8821,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpgtw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5452,14 +8836,29 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpgtw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpgtw: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpgtw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpgtw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5467,6 +8866,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpgtw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpgtw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5500,42 +8907,84 @@ define i16 @test_pextrw(<8 x i16> %a0) { ; SLM-NEXT: # kill: def $ax killed $ax killed $eax ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: # kill: def $ax killed $ax killed $eax ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrw: ; SKX: # %bb.0: ; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00] @@ -5568,42 +9017,84 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrw: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] @@ -5634,42 +9125,84 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaddwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaddwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaddwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaddwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaddwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaddwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaddwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaddwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaddwd: ; SKX: # %bb.0: ; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaddwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaddwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaddwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaddwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -5706,42 +9239,84 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxsw: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5777,42 +9352,84 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxub: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxub: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxub: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxub: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxub: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxub: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxub: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxub: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxub: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxub: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxub: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxub: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxub: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5848,42 +9465,84 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminsw: ; SKX: # %bb.0: ; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5919,42 +9578,84 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminub: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminub: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminub: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminub: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminub: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminub: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminub: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminub: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminub: ; SKX: # %bb.0: ; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminub: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminub: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminub: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminub: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5985,36 +9686,71 @@ define i32 @test_pmovmskb(<16 x i8> %a0) { ; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovmskb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovmskb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovmskb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovmskb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovmskb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovmskb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovmskb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovmskb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovmskb: ; SKX: # %bb.0: ; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovmskb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovmskb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovmskb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovmskb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] @@ -6043,42 +9779,84 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmulhuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmulhuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmulhuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmulhuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmulhuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmulhuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmulhuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmulhuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmulhuw: ; SKX: # %bb.0: ; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmulhuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmulhuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmulhuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmulhuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6110,42 +9888,84 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmulhw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmulhw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmulhw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmulhw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmulhw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmulhw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmulhw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmulhw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmulhw: ; SKX: # %bb.0: ; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmulhw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmulhw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmulhw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmulhw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6177,42 +9997,84 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmullw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmullw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmullw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmullw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmullw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmullw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmullw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmullw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmullw: ; SKX: # %bb.0: ; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmullw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmullw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmullw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmullw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6243,42 +10105,84 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmuludq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmuludq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmuludq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmuludq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmuludq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmuludq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmuludq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmuludq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmuludq: ; SKX: # %bb.0: ; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmuludq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmuludq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmuludq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmuludq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6314,6 +10218,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_por: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_por: ; SANDY: # %bb.0: ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6321,6 +10232,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_por: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_por: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6328,6 +10246,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_por: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_por: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6335,6 +10260,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_por: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6342,6 +10274,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_por: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_por: ; SKX: # %bb.0: ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6349,6 +10288,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_por: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_por: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6356,6 +10302,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_por: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_por: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -6392,42 +10345,84 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psadbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psadbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psadbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psadbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psadbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psadbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psadbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psadbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] +; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psadbw: ; SKX: # %bb.0: ; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psadbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psadbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psadbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psadbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -6465,6 +10460,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pshufd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] +; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pshufd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] @@ -6472,6 +10474,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pshufd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pshufd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6479,6 +10488,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pshufd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pshufd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6486,6 +10502,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pshufd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6493,6 +10516,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pshufd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pshufd: ; SKX: # %bb.0: ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6500,6 +10530,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pshufd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] +; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pshufd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] @@ -6507,6 +10544,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pshufd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25] +; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pshufd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50] @@ -6544,6 +10588,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pshufhw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] +; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pshufhw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] @@ -6551,6 +10602,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pshufhw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pshufhw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6558,6 +10616,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pshufhw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pshufhw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6565,6 +10630,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pshufhw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6572,6 +10644,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pshufhw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pshufhw: ; SKX: # %bb.0: ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6579,6 +10658,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pshufhw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] +; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pshufhw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] @@ -6586,6 +10672,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pshufhw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] +; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pshufhw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] @@ -6623,6 +10716,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pshuflw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] +; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pshuflw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] @@ -6630,6 +10730,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pshuflw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pshuflw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6637,6 +10744,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pshuflw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pshuflw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6644,6 +10758,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pshuflw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6651,6 +10772,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pshuflw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pshuflw: ; SKX: # %bb.0: ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6658,6 +10786,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pshuflw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] +; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pshuflw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] @@ -6665,6 +10800,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pshuflw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] +; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pshuflw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] @@ -6700,6 +10842,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pslld: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pslld: ; SANDY: # %bb.0: ; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6707,6 +10856,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pslld: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pslld: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6714,6 +10870,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pslld: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pslld: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6721,6 +10884,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pslld: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6728,6 +10898,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pslld: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pslld: ; SKX: # %bb.0: ; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6735,6 +10912,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pslld: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pslld: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6742,6 +10926,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pslld: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pslld: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -6779,36 +10970,71 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pslldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pslldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pslldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pslldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pslldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pslldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pslldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pslldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pslldq: ; SKX: # %bb.0: ; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pslldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pslldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pslldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pslldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] @@ -6839,6 +11065,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psllq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psllq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6846,6 +11079,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psllq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psllq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6853,6 +11093,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psllq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psllq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6860,6 +11107,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psllq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6867,6 +11121,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psllq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psllq: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6874,6 +11135,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psllq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psllq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6881,6 +11149,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psllq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psllq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -6918,6 +11193,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psllw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psllw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6925,6 +11207,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psllw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psllw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6932,6 +11221,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psllw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psllw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6939,6 +11235,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psllw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6946,6 +11249,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psllw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psllw: ; SKX: # %bb.0: ; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6953,6 +11263,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psllw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psllw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6960,6 +11277,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psllw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psllw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -6997,6 +11321,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrad: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrad: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7004,6 +11335,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrad: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrad: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7011,6 +11349,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrad: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrad: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7018,6 +11363,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrad: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7025,6 +11377,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrad: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrad: ; SKX: # %bb.0: ; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7032,6 +11391,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrad: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrad: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7039,6 +11405,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrad: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrad: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7076,6 +11449,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psraw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psraw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7083,6 +11463,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psraw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psraw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7090,6 +11477,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psraw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psraw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7097,6 +11491,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psraw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7104,6 +11505,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psraw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psraw: ; SKX: # %bb.0: ; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7111,6 +11519,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psraw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psraw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7118,6 +11533,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psraw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psraw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7155,6 +11577,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrld: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrld: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7162,6 +11591,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrld: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrld: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7169,6 +11605,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrld: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrld: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7176,6 +11619,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrld: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7183,6 +11633,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrld: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrld: ; SKX: # %bb.0: ; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7190,6 +11647,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrld: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrld: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7197,6 +11661,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrld: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrld: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7234,36 +11705,71 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrldq: ; SKX: # %bb.0: ; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] @@ -7294,6 +11800,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrlq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrlq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7301,6 +11814,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrlq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrlq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7308,6 +11828,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrlq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrlq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7315,6 +11842,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrlq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7322,6 +11856,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrlq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrlq: ; SKX: # %bb.0: ; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7329,6 +11870,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrlq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7336,6 +11884,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrlq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrlq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7373,6 +11928,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrlw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrlw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7380,6 +11942,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrlw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrlw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7387,6 +11956,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrlw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrlw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7394,6 +11970,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrlw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7401,6 +11984,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrlw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrlw: ; SKX: # %bb.0: ; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7408,6 +11998,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrlw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7415,6 +12012,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrlw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrlw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7453,42 +12057,84 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubb: ; SKX: # %bb.0: ; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7523,42 +12169,84 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubd: ; SKX: # %bb.0: ; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7589,42 +12277,84 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubq: ; SKX: # %bb.0: ; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7659,42 +12389,84 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubsb: ; SKX: # %bb.0: ; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7730,42 +12502,84 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubsw: ; SKX: # %bb.0: ; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7801,42 +12615,84 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubusb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubusb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubusb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubusb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubusb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubusb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubusb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubusb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubusb: ; SKX: # %bb.0: ; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubusb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubusb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubusb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubusb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7872,42 +12728,84 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubusw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubusw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubusw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubusw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubusw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubusw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubusw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubusw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubusw: ; SKX: # %bb.0: ; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubusw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubusw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubusw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubusw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7943,42 +12841,84 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubw: ; SKX: # %bb.0: ; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -8013,42 +12953,84 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhbw: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] ; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] @@ -8084,6 +13066,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -8091,6 +13080,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8098,6 +13094,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8105,6 +13108,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8112,6 +13122,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhdq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8119,6 +13136,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -8126,6 +13150,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] @@ -8161,6 +13192,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhqdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhqdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -8168,6 +13206,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhqdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhqdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8175,6 +13220,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhqdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhqdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8182,6 +13234,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhqdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8189,6 +13248,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhqdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhqdq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8196,6 +13262,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhqdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhqdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -8203,6 +13276,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhqdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhqdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] @@ -8239,42 +13319,84 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhwd: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] @@ -8309,42 +13431,84 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpcklbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpcklbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpcklbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpcklbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpcklbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpcklbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpcklbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpcklbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpcklbw: ; SKX: # %bb.0: ; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpcklbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpcklbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpcklbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpcklbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] @@ -8380,6 +13544,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -8387,6 +13558,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8394,6 +13572,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8401,6 +13586,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8408,6 +13600,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckldq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8415,6 +13614,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -8422,6 +13628,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] @@ -8457,6 +13670,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpcklqdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpcklqdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -8464,6 +13684,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpcklqdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpcklqdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8471,6 +13698,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpcklqdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpcklqdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8478,6 +13712,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpcklqdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8485,6 +13726,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpcklqdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpcklqdq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8492,6 +13740,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpcklqdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpcklqdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -8499,6 +13754,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpcklqdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpcklqdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] @@ -8535,42 +13797,84 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpcklwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpcklwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpcklwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpcklwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpcklwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpcklwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpcklwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpcklwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpcklwd: ; SKX: # %bb.0: ; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpcklwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpcklwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] ; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpcklwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpcklwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] @@ -8604,6 +13908,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pxor: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pxor: ; SANDY: # %bb.0: ; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8611,6 +13922,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pxor: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pxor: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8618,6 +13936,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pxor: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pxor: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8625,6 +13950,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pxor: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8632,6 +13964,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pxor: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pxor: ; SKX: # %bb.0: ; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8639,6 +13978,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pxor: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pxor: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -8646,6 +13992,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pxor: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pxor: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -8681,6 +14034,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_shufpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_shufpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8688,6 +14048,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_shufpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8695,6 +14062,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_shufpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_shufpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8702,6 +14076,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_shufpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8709,6 +14090,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_shufpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_shufpd: ; SKX: # %bb.0: ; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8716,6 +14104,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_shufpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] @@ -8723,6 +14118,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_shufpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_shufpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] @@ -8759,6 +14161,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_sqrtpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00] +; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00] @@ -8766,6 +14175,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_sqrtpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] @@ -8773,6 +14189,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_sqrtpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [25:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_sqrtpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] @@ -8780,6 +14203,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_sqrtpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] @@ -8787,6 +14217,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_sqrtpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:1.00] +; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_sqrtpd: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] @@ -8794,6 +14231,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_sqrtpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] +; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:21.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:21.00] @@ -8801,6 +14245,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_sqrtpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_sqrtpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00] @@ -8842,6 +14293,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_sqrtsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] @@ -8850,6 +14309,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_sqrtsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] @@ -8858,6 +14325,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_sqrtsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_sqrtsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] @@ -8866,6 +14341,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_sqrtsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_sqrtsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] @@ -8874,6 +14357,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_sqrtsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:1.00] +; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_sqrtsd: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] @@ -8882,6 +14373,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_sqrtsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00] +; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [26:21.00] +; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [26:21.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_sqrtsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00] @@ -8890,6 +14389,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_sqrtsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_sqrtsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50] @@ -8924,42 +14431,84 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_subpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_subpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_subpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_subpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_subpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_subpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_subpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_subpd: ; SKX: # %bb.0: ; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_subpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_subpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_subpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_subpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -8990,42 +14539,84 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_subsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_subsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_subsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_subsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_subsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_subsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_subsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_subsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_subsd: ; SKX: # %bb.0: ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_subsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_subsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_subsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_subsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -9080,6 +14671,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_ucomisd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_ucomisd: ; SANDY: # %bb.0: ; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -9094,6 +14699,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_ucomisd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_ucomisd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9108,6 +14727,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_ucomisd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_ucomisd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9122,6 +14755,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_ucomisd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_ucomisd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -9136,6 +14783,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_ucomisd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_ucomisd: ; SKX: # %bb.0: ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -9150,6 +14811,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_ucomisd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_ucomisd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9164,6 +14839,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_ucomisd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_ucomisd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9207,6 +14896,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_unpckhpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpckhpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9214,6 +14910,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_unpckhpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9221,6 +14924,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_unpckhpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_unpckhpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9228,6 +14938,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_unpckhpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9235,6 +14952,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_unpckhpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_unpckhpd: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9242,6 +14966,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_unpckhpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -9249,6 +14980,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_unpckhpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_unpckhpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -9290,6 +15028,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_unpcklpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SANDY-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpcklpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9297,6 +15044,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_unpcklpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9304,6 +15060,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_unpcklpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_unpcklpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9311,6 +15076,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_unpcklpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9318,6 +15092,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_unpcklpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_unpcklpd: ; SKX: # %bb.0: ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9325,6 +15108,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_unpcklpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -9332,6 +15124,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_unpcklpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_unpcklpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -9367,6 +15168,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_xorpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_xorpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -9374,6 +15182,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_xorpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -9381,6 +15196,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_xorpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_xorpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -9388,6 +15210,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_xorpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -9395,6 +15224,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_xorpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_xorpd: ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -9402,6 +15238,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_xorpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -9409,6 +15252,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_xorpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_xorpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |