diff options
-rw-r--r-- | llvm/test/CodeGen/X86/sse-schedule.ll | 2440 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-schedule.ll | 5948 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse3-schedule.ll | 562 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse41-schedule.ll | 2338 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse42-schedule.ll | 623 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/ssse3-schedule.ll | 730 |
6 files changed, 12529 insertions, 112 deletions
diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index 91b6ce880aa..75343257deb 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -1,15 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 + +; FIXME: we should really use -mattr=-sse2 here but some of the comparison tests don't work without access to legal <4 x i32> types. define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_addps: @@ -30,42 +40,84 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: addps (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addps: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addps: ; SKX: # %bb.0: ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addps (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -96,42 +148,84 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; SLM-NEXT: addss (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addss: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addss: ; SKX: # %bb.0: ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addss (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -166,42 +260,84 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: andps (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_andps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andps: ; SANDY: # %bb.0: ; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_andps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_andps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_andps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_andps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_andps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_andps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_andps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_andps: ; SKX: # %bb.0: ; SKX-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_andps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_andps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_andps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andps (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_andps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -240,42 +376,84 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SLM-NEXT: andnps (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_andnotps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andnotps: ; SANDY: # %bb.0: ; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_andnotps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_andnotps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_andnotps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_andnotps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_andnotps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_andnotps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_andnotps: ; SKX: # %bb.0: ; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_andnotps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_andnotps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_andnotps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -315,6 +493,13 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cmpps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmpps: ; SANDY: # %bb.0: ; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -322,6 +507,13 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cmpps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -329,6 +521,13 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cmpps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cmpps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -336,6 +535,13 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BROADWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cmpps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cmpps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.50] @@ -343,14 +549,27 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SKYLAKE-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cmpps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cmpps: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cmpps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00] +; BTVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [2:1.00] @@ -358,6 +577,13 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; BTVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cmpps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cmpps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -392,42 +618,84 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; SLM-NEXT: cmpeqss (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cmpss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmpss: ; SANDY: # %bb.0: ; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cmpss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cmpss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cmpss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cmpss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cmpss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cmpss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cmpss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cmpss: ; SKX: # %bb.0: ; SKX-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cmpss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cmpss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cmpss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cmpss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -487,6 +755,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_comiss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_comiss: ; SANDY: # %bb.0: ; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] @@ -501,6 +783,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_comiss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_comiss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -515,6 +811,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_comiss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_comiss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -529,6 +839,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_comiss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_comiss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] @@ -543,6 +867,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_comiss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_comiss: ; SKX: # %bb.0: ; SKX-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00] @@ -557,6 +895,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_comiss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_comiss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -571,6 +923,20 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_comiss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_comiss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -614,6 +980,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsi2ss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00] +; SANDY-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsi2ss: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] @@ -621,6 +994,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsi2ss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsi2ss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -628,6 +1008,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsi2ss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsi2ss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -635,6 +1022,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsi2ss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2ss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -642,6 +1036,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsi2ss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsi2ss: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -649,6 +1050,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsi2ss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsi2ss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [3:1.00] @@ -656,6 +1064,13 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsi2ss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsi2ss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -691,6 +1106,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsi2ssq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] +; SANDY-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsi2ssq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] @@ -698,6 +1120,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsi2ssq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] +; HASWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsi2ssq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] @@ -705,6 +1134,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsi2ssq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] +; BROADWELL-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsi2ssq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] @@ -712,6 +1148,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsi2ssq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] +; SKYLAKE-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2ssq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] @@ -719,6 +1162,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsi2ssq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [6:2.00] +; SKX-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsi2ssq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [6:2.00] @@ -726,6 +1176,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsi2ssq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsi2ssq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [3:1.00] @@ -733,6 +1190,13 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsi2ssq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsi2ssq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -768,6 +1232,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtss2si: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] +; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtss2si: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] @@ -775,6 +1246,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtss2si: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtss2si: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] @@ -782,6 +1260,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtss2si: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtss2si: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [9:1.00] @@ -789,6 +1274,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtss2si: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtss2si: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] @@ -796,6 +1288,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtss2si: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [11:1.00] +; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtss2si: ; SKX: # %bb.0: ; SKX-NEXT: vcvtss2si %xmm0, %ecx # sched: [6:1.00] @@ -803,6 +1302,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtss2si: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtss2si: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00] @@ -810,6 +1316,13 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtss2si: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtss2si: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtss2si (%rdi), %eax # sched: [12:1.00] @@ -848,6 +1361,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtss2siq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] +; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtss2siq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] @@ -855,6 +1375,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtss2siq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtss2siq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] @@ -862,6 +1389,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtss2siq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtss2siq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [9:1.00] @@ -869,6 +1403,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtss2siq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtss2siq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] @@ -876,6 +1417,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtss2siq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [11:1.00] +; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtss2siq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtss2si %xmm0, %rcx # sched: [6:1.00] @@ -883,6 +1431,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtss2siq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtss2siq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00] @@ -890,6 +1445,13 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtss2siq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtss2siq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtss2si (%rdi), %rax # sched: [12:1.00] @@ -928,6 +1490,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttss2si: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] +; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttss2si: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] @@ -935,6 +1504,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttss2si: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttss2si: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] @@ -942,6 +1518,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttss2si: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttss2si: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [9:1.00] @@ -949,6 +1532,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttss2si: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttss2si: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] @@ -956,6 +1546,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttss2si: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [7:1.00] +; SKX-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [11:1.00] +; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttss2si: ; SKX: # %bb.0: ; SKX-NEXT: vcvttss2si %xmm0, %ecx # sched: [7:1.00] @@ -963,6 +1560,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttss2si: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttss2si: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00] @@ -970,6 +1574,13 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttss2si: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttss2si: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttss2si (%rdi), %eax # sched: [12:1.00] @@ -1005,6 +1616,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttss2siq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00] +; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttss2siq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] @@ -1012,6 +1630,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttss2siq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00] +; HASWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00] +; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttss2siq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] @@ -1019,6 +1644,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttss2siq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [10:1.00] +; BROADWELL-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttss2siq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [9:1.00] @@ -1026,6 +1658,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttss2siq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] +; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttss2siq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] @@ -1033,6 +1672,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttss2siq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [7:1.00] +; SKX-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] +; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttss2siq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttss2si %xmm0, %rcx # sched: [7:1.00] @@ -1040,6 +1686,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttss2siq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttss2siq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00] @@ -1047,6 +1700,13 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttss2siq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttss2siq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttss2si (%rdi), %rax # sched: [12:1.00] @@ -1079,42 +1739,84 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: divps (%rdi), %xmm0 # sched: [37:34.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_divps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divps: ; SANDY: # %bb.0: ; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_divps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_divps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00] ; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_divps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00] +; BROADWELL-SSE-NEXT: divps (%rdi), %xmm0 # sched: [16:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_divps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] ; BROADWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_divps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_divps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] ; SKYLAKE-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_divps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: divps %xmm1, %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: divps (%rdi), %xmm0 # sched: [17:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_divps: ; SKX: # %bb.0: ; SKX-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [11:1.00] ; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_divps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00] +; BTVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [24:19.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_divps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [19:19.00] ; BTVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [24:19.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_divps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: divps %xmm1, %xmm0 # sched: [15:1.00] +; ZNVER1-SSE-NEXT: divps (%rdi), %xmm0 # sched: [22:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_divps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [15:1.00] @@ -1145,42 +1847,84 @@ define float @test_divss(float %a0, float %a1, float *%a2) { ; SLM-NEXT: divss (%rdi), %xmm0 # sched: [37:34.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_divss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:1.00] +; SANDY-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divss: ; SANDY: # %bb.0: ; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_divss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_divss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00] ; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [18:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_divss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00] +; BROADWELL-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_divss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] ; BROADWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_divss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_divss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] ; SKYLAKE-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_divss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: divss %xmm1, %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: divss (%rdi), %xmm0 # sched: [16:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_divss: ; SKX: # %bb.0: ; SKX-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [11:1.00] ; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_divss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00] +; BTVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [24:19.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_divss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [19:19.00] ; BTVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [24:19.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_divss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: divss %xmm1, %xmm0 # sched: [15:1.00] +; ZNVER1-SSE-NEXT: divss (%rdi), %xmm0 # sched: [22:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_divss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [15:1.00] @@ -1211,42 +1955,84 @@ define void @test_ldmxcsr(i32 %a0) { ; SLM-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_ldmxcsr: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_ldmxcsr: ; SANDY: # %bb.0: ; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_ldmxcsr: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; HASWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_ldmxcsr: ; HASWELL: # %bb.0: ; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_ldmxcsr: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_ldmxcsr: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; BROADWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_ldmxcsr: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_ldmxcsr: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; SKYLAKE-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_ldmxcsr: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SKX-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_ldmxcsr: ; SKX: # %bb.0: ; SKX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_ldmxcsr: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BTVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_ldmxcsr: ; BTVER2: # %bb.0: ; BTVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; BTVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_ldmxcsr: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_ldmxcsr: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:0.50] @@ -1279,42 +2065,84 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: maxps (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maxps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maxps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maxps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maxps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maxps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maxps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maxps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maxps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maxps: ; SKX: # %bb.0: ; SKX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maxps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maxps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maxps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maxps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1346,42 +2174,84 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: maxss (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maxss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxss: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maxss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maxss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maxss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maxss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maxss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maxss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maxss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maxss: ; SKX: # %bb.0: ; SKX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maxss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maxss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maxss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maxss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1413,42 +2283,84 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: minps (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_minps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minps: ; SANDY: # %bb.0: ; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_minps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_minps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_minps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: minps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_minps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_minps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_minps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_minps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: minps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_minps: ; SKX: # %bb.0: ; SKX-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_minps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_minps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_minps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: minps (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_minps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1480,42 +2392,84 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: minss (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_minss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minss: ; SANDY: # %bb.0: ; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_minss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_minss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_minss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: minss (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_minss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_minss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_minss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_minss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: minss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_minss: ; SKX: # %bb.0: ; SKX-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_minss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_minss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_minss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: minss (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_minss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -1550,6 +2504,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movaps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movaps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] @@ -1557,6 +2518,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movaps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movaps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] @@ -1564,6 +2532,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movaps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movaps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:0.50] @@ -1571,6 +2546,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movaps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movaps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] @@ -1578,6 +2560,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movaps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movaps: ; SKX: # %bb.0: ; SKX-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] @@ -1585,6 +2574,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movaps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movaps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [5:1.00] @@ -1592,6 +2588,13 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; BTVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movaps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movaps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovaps (%rdi), %xmm0 # sched: [8:0.50] @@ -1628,36 +2631,71 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { ; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movhlps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movhlps: ; SANDY: # %bb.0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movhlps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movhlps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movhlps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movhlps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movhlps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movhlps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movhlps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movhlps: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movhlps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movhlps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movhlps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movhlps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50] @@ -1689,9 +2727,18 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SLM: # %bb.0: ; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] ; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: pextrq $1, %xmm1, (%rdi) # sched: [4:2.00] +; SLM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movhps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movhps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] @@ -1699,6 +2746,14 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movhps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movhps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -1706,6 +2761,14 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movhps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movhps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -1713,6 +2776,14 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movhps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movhps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -1720,6 +2791,14 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movhps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movhps: ; SKX: # %bb.0: ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -1727,6 +2806,14 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movhps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] +; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movhps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -1734,6 +2821,14 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movhps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movhps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] @@ -1771,42 +2866,84 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movlhps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movlhps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movlhps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movlhps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movlhps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movlhps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movlhps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movlhps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movlhps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movlhps: ; SKX: # %bb.0: ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movlhps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movlhps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movlhps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movlhps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -1839,6 +2976,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movlps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movlps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] @@ -1846,6 +2990,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movlps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movlps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -1853,6 +3004,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movlps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movlps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -1860,6 +3018,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movlps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movlps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -1867,6 +3032,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movlps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movlps: ; SKX: # %bb.0: ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -1874,6 +3046,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movlps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movlps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -1881,6 +3060,13 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movlps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movlps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] @@ -1915,36 +3101,71 @@ define i32 @test_movmskps(<4 x float> %a0) { ; SLM-NEXT: movmskps %xmm0, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movmskps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movmskps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movmskps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movmskps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movmskps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movmskps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movmskps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movmskps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movmskps: ; SKX: # %bb.0: ; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movmskps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movmskps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movmskps %xmm0, %eax # sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movmskps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovmskps %xmm0, %eax # sched: [1:1.00] @@ -1976,36 +3197,71 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movntps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movntps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movntps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movntps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movntps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movntps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movntps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movntps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movntps: ; SKX: # %bb.0: ; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movntps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movntps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movntps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movntps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:0.50] @@ -2036,6 +3292,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SLM-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movss_mem: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movss_mem: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] @@ -2043,6 +3306,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movss_mem: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; HASWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movss_mem: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] @@ -2050,6 +3320,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movss_mem: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movss_mem: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] @@ -2057,6 +3334,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; BROADWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movss_mem: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movss_mem: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] @@ -2064,6 +3348,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SKYLAKE-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movss_mem: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-SSE-NEXT: addss %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movss_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] @@ -2071,6 +3362,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movss_mem: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movss_mem: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2078,6 +3376,13 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; BTVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movss_mem: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movss_mem: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] @@ -2109,39 +3414,74 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { ; ; SLM-LABEL: test_movss_reg: ; SLM: # %bb.0: -; SLM-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; SLM-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movss_reg: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movss_reg: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movss_reg: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movss_reg: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movss_reg: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movss_reg: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movss_reg: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movss_reg: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movss_reg: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movss_reg: ; SKX: # %bb.0: -; SKX-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] +; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movss_reg: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movss_reg: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movss_reg: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movss_reg: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] @@ -2172,6 +3512,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SLM-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movups: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movups: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] @@ -2179,6 +3526,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movups: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movups: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] @@ -2186,6 +3540,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movups: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movups: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [5:0.50] @@ -2193,6 +3554,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movups: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movups: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] @@ -2200,6 +3568,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movups: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: addps %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movups: ; SKX: # %bb.0: ; SKX-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] @@ -2207,6 +3582,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movups: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movups: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [5:1.00] @@ -2214,6 +3596,13 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; BTVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movups: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movups (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movups: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovups (%rdi), %xmm0 # sched: [8:0.50] @@ -2245,42 +3634,84 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: mulps (%rdi), %xmm0 # sched: [8:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mulps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mulps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mulps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mulps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50] +; BROADWELL-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [8:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mulps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50] ; BROADWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mulps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mulps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mulps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mulps: ; SKX: # %bb.0: ; SKX-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mulps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mulps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mulps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [3:0.50] +; ZNVER1-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [10:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mulps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [3:0.50] @@ -2311,42 +3742,84 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; SLM-NEXT: mulss (%rdi), %xmm0 # sched: [8:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mulss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulss: ; SANDY: # %bb.0: ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mulss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mulss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mulss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50] +; BROADWELL-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [8:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mulss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50] ; BROADWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mulss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mulss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mulss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mulss: ; SKX: # %bb.0: ; SKX-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mulss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mulss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mulss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [3:0.50] +; ZNVER1-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [10:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mulss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [3:0.50] @@ -2381,42 +3854,84 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; SLM-NEXT: orps (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_orps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_orps: ; SANDY: # %bb.0: ; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_orps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_orps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_orps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_orps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_orps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_orps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_orps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_orps: ; SKX: # %bb.0: ; SKX-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_orps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_orps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_orps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orps (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_orps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -2463,6 +3978,16 @@ define void @test_prefetch(i8* %a0) optsize { ; SLM-NEXT: #NO_APP ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_prefetch: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: #APP +; SANDY-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; SANDY-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] +; SANDY-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] +; SANDY-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] +; SANDY-SSE-NEXT: #NO_APP +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_prefetch: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP @@ -2473,6 +3998,16 @@ define void @test_prefetch(i8* %a0) optsize { ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_prefetch: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: #APP +; HASWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; HASWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] +; HASWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] +; HASWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] +; HASWELL-SSE-NEXT: #NO_APP +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_prefetch: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP @@ -2483,6 +4018,16 @@ define void @test_prefetch(i8* %a0) optsize { ; HASWELL-NEXT: #NO_APP ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_prefetch: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: #APP +; BROADWELL-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; BROADWELL-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] +; BROADWELL-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] +; BROADWELL-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] +; BROADWELL-SSE-NEXT: #NO_APP +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_prefetch: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP @@ -2493,6 +4038,16 @@ define void @test_prefetch(i8* %a0) optsize { ; BROADWELL-NEXT: #NO_APP ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_prefetch: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: #APP +; SKYLAKE-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; SKYLAKE-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] +; SKYLAKE-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] +; SKYLAKE-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] +; SKYLAKE-SSE-NEXT: #NO_APP +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_prefetch: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP @@ -2503,6 +4058,16 @@ define void @test_prefetch(i8* %a0) optsize { ; SKYLAKE-NEXT: #NO_APP ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_prefetch: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: #APP +; SKX-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; SKX-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50] +; SKX-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50] +; SKX-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50] +; SKX-SSE-NEXT: #NO_APP +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_prefetch: ; SKX: # %bb.0: ; SKX-NEXT: #APP @@ -2513,6 +4078,16 @@ define void @test_prefetch(i8* %a0) optsize { ; SKX-NEXT: #NO_APP ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_prefetch: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: #APP +; BTVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:1.00] +; BTVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:1.00] +; BTVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:1.00] +; BTVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:1.00] +; BTVER2-SSE-NEXT: #NO_APP +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_prefetch: ; BTVER2: # %bb.0: ; BTVER2-NEXT: #APP @@ -2523,6 +4098,16 @@ define void @test_prefetch(i8* %a0) optsize { ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_prefetch: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: #APP +; ZNVER1-SSE-NEXT: prefetchnta (%rdi) # sched: [8:0.50] +; ZNVER1-SSE-NEXT: prefetcht0 (%rdi) # sched: [8:0.50] +; ZNVER1-SSE-NEXT: prefetcht1 (%rdi) # sched: [8:0.50] +; ZNVER1-SSE-NEXT: prefetcht2 (%rdi) # sched: [8:0.50] +; ZNVER1-SSE-NEXT: #NO_APP +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_prefetch: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: #APP @@ -2560,6 +4145,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_rcpps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_rcpps: ; SANDY: # %bb.0: ; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] @@ -2567,6 +4159,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_rcpps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] +; HASWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] @@ -2574,6 +4173,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_rcpps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_rcpps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] @@ -2581,6 +4187,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_rcpps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] +; SKYLAKE-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_rcpps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] @@ -2588,6 +4201,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_rcpps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [10:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_rcpps: ; SKX: # %bb.0: ; SKX-NEXT: vrcpps %xmm0, %xmm0 # sched: [4:1.00] @@ -2595,6 +4215,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_rcpps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00] +; BTVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [7:1.00] @@ -2602,6 +4229,13 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_rcpps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:0.50] +; ZNVER1-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [12:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_rcpps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vrcpps (%rdi), %xmm1 # sched: [12:0.50] @@ -2643,6 +4277,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_rcpss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_rcpss: ; SANDY: # %bb.0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -2651,6 +4293,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_rcpss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; HASWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_rcpss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -2659,6 +4309,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_rcpss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_rcpss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -2667,6 +4325,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_rcpss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] +; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_rcpss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -2675,6 +4341,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_rcpss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [4:1.00] +; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [4:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_rcpss: ; SKX: # %bb.0: ; SKX-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -2683,6 +4357,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_rcpss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [7:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_rcpss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2691,6 +4373,14 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_rcpss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [12:0.50] +; ZNVER1-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [12:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_rcpss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] @@ -2732,6 +4422,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_rsqrtps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] +; SANDY-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_rsqrtps: ; SANDY: # %bb.0: ; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] @@ -2739,6 +4436,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_rsqrtps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] +; HASWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] @@ -2746,6 +4450,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_rsqrtps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_rsqrtps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] @@ -2753,6 +4464,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_rsqrtps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] +; SKYLAKE-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_rsqrtps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] @@ -2760,6 +4478,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_rsqrtps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [4:1.00] +; SKX-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [10:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_rsqrtps: ; SKX: # %bb.0: ; SKX-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [4:1.00] @@ -2767,6 +4492,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_rsqrtps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00] +; BTVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [7:1.00] @@ -2774,6 +4506,13 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_rsqrtps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:0.50] +; ZNVER1-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [12:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_rsqrtps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [12:0.50] @@ -2815,6 +4554,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_rsqrtss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_rsqrtss: ; SANDY: # %bb.0: ; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -2823,6 +4570,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_rsqrtss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; HASWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_rsqrtss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -2831,6 +4586,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_rsqrtss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_rsqrtss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -2839,6 +4602,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_rsqrtss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] +; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_rsqrtss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -2847,6 +4618,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_rsqrtss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [4:1.00] +; SKX-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [4:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_rsqrtss: ; SKX: # %bb.0: ; SKX-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -2855,6 +4634,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_rsqrtss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [7:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_rsqrtss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2863,6 +4650,14 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_rsqrtss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:0.50] +; ZNVER1-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_rsqrtss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] @@ -2902,36 +4697,71 @@ define void @test_sfence() { ; SLM-NEXT: sfence # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_sfence: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: sfence # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sfence: ; SANDY: # %bb.0: ; SANDY-NEXT: sfence # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_sfence: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: sfence # sched: [2:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_sfence: ; HASWELL: # %bb.0: ; HASWELL-NEXT: sfence # sched: [2:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_sfence: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: sfence # sched: [2:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_sfence: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: sfence # sched: [2:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_sfence: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: sfence # sched: [2:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_sfence: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: sfence # sched: [2:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_sfence: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: sfence # sched: [2:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_sfence: ; SKX: # %bb.0: ; SKX-NEXT: sfence # sched: [2:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_sfence: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: sfence # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_sfence: ; BTVER2: # %bb.0: ; BTVER2-NEXT: sfence # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_sfence: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: sfence # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_sfence: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: sfence # sched: [1:0.50] @@ -2963,6 +4793,13 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_shufps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; SANDY-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_shufps: ; SANDY: # %bb.0: ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] @@ -2970,6 +4807,13 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_shufps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; HASWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_shufps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] @@ -2977,6 +4821,13 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_shufps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_shufps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] @@ -2984,6 +4835,13 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_shufps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_shufps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] @@ -2991,6 +4849,13 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_shufps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; SKX-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_shufps: ; SKX: # %bb.0: ; SKX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] @@ -2998,6 +4863,13 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_shufps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] +; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_shufps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] @@ -3005,6 +4877,13 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_shufps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_shufps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50] @@ -3041,6 +4920,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_sqrtps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:1.00] +; SANDY-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtps: ; SANDY: # %bb.0: ; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] @@ -3048,6 +4934,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_sqrtps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00] +; HASWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] @@ -3055,6 +4948,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_sqrtps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00] +; BROADWELL-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_sqrtps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] @@ -3062,6 +4962,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_sqrtps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [13:1.00] +; SKYLAKE-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [19:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_sqrtps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00] @@ -3069,6 +4976,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_sqrtps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [12:1.00] +; SKX-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [18:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_sqrtps: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtps %xmm0, %xmm0 # sched: [12:1.00] @@ -3076,6 +4990,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_sqrtps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00] +; BTVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [26:21.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [26:21.00] @@ -3083,6 +5004,13 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_sqrtps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [20:1.00] +; ZNVER1-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_sqrtps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsqrtps (%rdi), %xmm1 # sched: [27:1.00] @@ -3124,6 +5052,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_sqrtss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:1.00] +; SANDY-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtss: ; SANDY: # %bb.0: ; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] @@ -3132,6 +5068,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_sqrtss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00] +; HASWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] +; HASWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_sqrtss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] @@ -3140,6 +5084,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_sqrtss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00] +; BROADWELL-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_sqrtss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] @@ -3148,6 +5100,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_sqrtss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [13:1.00] +; SKYLAKE-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [13:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_sqrtss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] @@ -3156,6 +5116,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_sqrtss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [12:1.00] +; SKX-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] +; SKX-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [12:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_sqrtss: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [12:1.00] @@ -3164,6 +5132,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_sqrtss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00] +; BTVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [26:21.00] +; BTVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [26:21.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_sqrtss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [5:1.00] @@ -3172,6 +5148,14 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_sqrtss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_sqrtss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovaps (%rdi), %xmm1 # sched: [8:0.50] @@ -3206,42 +5190,84 @@ define i32 @test_stmxcsr() { ; SLM-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_stmxcsr: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_stmxcsr: ; SANDY: # %bb.0: ; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_stmxcsr: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; HASWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_stmxcsr: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] ; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_stmxcsr: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_stmxcsr: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] ; BROADWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_stmxcsr: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_stmxcsr: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] ; SKYLAKE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_stmxcsr: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] +; SKX-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_stmxcsr: ; SKX: # %bb.0: ; SKX-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] ; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_stmxcsr: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; BTVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_stmxcsr: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; BTVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_stmxcsr: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?] +; ZNVER1-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_stmxcsr: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [100:?] @@ -3274,42 +5300,84 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: subps (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_subps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subps: ; SANDY: # %bb.0: ; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_subps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_subps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_subps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_subps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_subps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_subps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_subps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: subps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_subps: ; SKX: # %bb.0: ; SKX-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_subps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_subps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_subps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: subps (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_subps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -3340,42 +5408,84 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; SLM-NEXT: subss (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_subss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subss: ; SANDY: # %bb.0: ; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_subss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_subss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_subss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_subss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_subss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_subss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_subss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: subss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_subss: ; SKX: # %bb.0: ; SKX-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_subss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_subss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_subss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: subss (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_subss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -3430,6 +5540,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_ucomiss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_ucomiss: ; SANDY: # %bb.0: ; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] @@ -3444,6 +5568,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_ucomiss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_ucomiss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -3458,6 +5596,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_ucomiss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_ucomiss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -3472,6 +5624,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_ucomiss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_ucomiss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] @@ -3486,6 +5652,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_ucomiss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_ucomiss: ; SKX: # %bb.0: ; SKX-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00] @@ -3500,6 +5680,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_ucomiss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_ucomiss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -3514,6 +5708,20 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_ucomiss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_ucomiss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] @@ -3557,6 +5765,13 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_unpckhps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SANDY-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpckhps: ; SANDY: # %bb.0: ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -3564,6 +5779,13 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_unpckhps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; HASWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -3571,6 +5793,13 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_unpckhps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_unpckhps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -3578,6 +5807,13 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_unpckhps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_unpckhps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -3585,6 +5821,13 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_unpckhps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_unpckhps: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -3592,6 +5835,13 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_unpckhps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -3599,6 +5849,13 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_unpckhps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; ZNVER1-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_unpckhps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -3634,6 +5891,13 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_unpcklps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SANDY-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpcklps: ; SANDY: # %bb.0: ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -3641,6 +5905,13 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_unpcklps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -3648,6 +5919,13 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_unpcklps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_unpcklps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -3655,6 +5933,13 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_unpcklps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_unpcklps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -3662,6 +5947,13 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_unpcklps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_unpcklps: ; SKX: # %bb.0: ; SKX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -3669,6 +5961,13 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_unpcklps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -3676,6 +5975,13 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_unpcklps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_unpcklps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -3712,42 +6018,84 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SLM-NEXT: xorps (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_xorps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_xorps: ; SANDY: # %bb.0: ; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_xorps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_xorps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_xorps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_xorps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_xorps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_xorps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_xorps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_xorps: ; SKX: # %bb.0: ; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_xorps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_xorps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_xorps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_xorps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -3796,6 +6144,14 @@ define <4 x float> @test_fnop() nounwind { ; SLM-NEXT: #NO_APP ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_fnop: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: #APP +; SANDY-SSE-NEXT: nop # sched: [1:?] +; SANDY-SSE-NEXT: #NO_APP +; SANDY-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_fnop: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP @@ -3804,6 +6160,14 @@ define <4 x float> @test_fnop() nounwind { ; SANDY-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_fnop: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: #APP +; HASWELL-SSE-NEXT: nop # sched: [1:0.25] +; HASWELL-SSE-NEXT: #NO_APP +; HASWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_fnop: ; HASWELL: # %bb.0: ; HASWELL-NEXT: #APP @@ -3812,6 +6176,14 @@ define <4 x float> @test_fnop() nounwind { ; HASWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_fnop: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: #APP +; BROADWELL-SSE-NEXT: nop # sched: [1:0.25] +; BROADWELL-SSE-NEXT: #NO_APP +; BROADWELL-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_fnop: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: #APP @@ -3820,6 +6192,14 @@ define <4 x float> @test_fnop() nounwind { ; BROADWELL-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_fnop: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: #APP +; SKYLAKE-SSE-NEXT: nop # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: #NO_APP +; SKYLAKE-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_fnop: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: #APP @@ -3828,6 +6208,14 @@ define <4 x float> @test_fnop() nounwind { ; SKYLAKE-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_fnop: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: #APP +; SKX-SSE-NEXT: nop # sched: [1:0.25] +; SKX-SSE-NEXT: #NO_APP +; SKX-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_fnop: ; SKX: # %bb.0: ; SKX-NEXT: #APP @@ -3836,6 +6224,14 @@ define <4 x float> @test_fnop() nounwind { ; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_fnop: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: #APP +; BTVER2-SSE-NEXT: nop # sched: [1:0.50] +; BTVER2-SSE-NEXT: #NO_APP +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_fnop: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -3844,6 +6240,14 @@ define <4 x float> @test_fnop() nounwind { ; BTVER2-NEXT: #NO_APP ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_fnop: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: #APP +; ZNVER1-SSE-NEXT: nop # sched: [1:?] +; ZNVER1-SSE-NEXT: #NO_APP +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_fnop: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.25] diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 0c30bada475..db6d7a5c198 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -1,15 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_addpd: @@ -30,42 +38,84 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addpd: ; SKX: # %bb.0: ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -96,42 +146,84 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addsd: ; SKX: # %bb.0: ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -165,6 +257,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_andpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -172,6 +271,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_andpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_andpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -179,6 +285,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_andpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_andpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -186,6 +299,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_andpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_andpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -193,6 +313,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_andpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_andpd: ; SKX: # %bb.0: ; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -200,6 +327,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_andpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_andpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -207,6 +341,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_andpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_andpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -246,6 +387,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_andnotpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andnotpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -253,6 +401,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_andnotpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -260,6 +415,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_andnotpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_andnotpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -267,6 +429,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_andnotpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_andnotpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -274,6 +443,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_andnotpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_andnotpd: ; SKX: # %bb.0: ; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -281,6 +457,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_andnotpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -288,6 +471,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_andnotpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_andnotpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -329,36 +519,71 @@ define void @test_clflush(i8* %p){ ; SLM-NEXT: clflush (%rdi) # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_clflush: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_clflush: ; SANDY: # %bb.0: ; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_clflush: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_clflush: ; HASWELL: # %bb.0: ; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_clflush: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_clflush: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_clflush: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_clflush: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_clflush: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_clflush: ; SKX: # %bb.0: ; SKX-NEXT: clflush (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_clflush: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_clflush: ; BTVER2: # %bb.0: ; BTVER2-NEXT: clflush (%rdi) # sched: [5:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_clflush: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_clflush: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50] @@ -390,6 +615,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cmppd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmppd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -397,6 +629,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cmppd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -404,6 +643,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cmppd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cmppd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -411,6 +657,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cmppd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cmppd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] @@ -418,14 +671,27 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cmppd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cmppd: ; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33] +; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] +; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cmppd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] +; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] @@ -433,6 +699,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cmppd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cmppd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -467,42 +740,84 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cmpsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmpsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cmpsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cmpsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cmpsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cmpsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cmpsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cmpsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cmpsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cmpsd: ; SKX: # %bb.0: ; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cmpsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cmpsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cmpsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cmpsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -562,6 +877,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_comisd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_comisd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -576,6 +905,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_comisd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_comisd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -590,6 +933,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_comisd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_comisd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -604,6 +961,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_comisd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_comisd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -618,6 +989,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_comisd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_comisd: ; SKX: # %bb.0: ; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -632,6 +1017,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_comisd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_comisd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -646,6 +1045,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_comisd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_comisd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -689,6 +1102,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtdq2pd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] @@ -696,6 +1116,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtdq2pd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] @@ -703,6 +1130,14 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtdq2pd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtdq2pd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] @@ -710,6 +1145,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtdq2pd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2pd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -717,6 +1159,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtdq2pd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtdq2pd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -724,6 +1173,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtdq2pd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] @@ -731,6 +1187,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtdq2pd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtdq2pd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00] @@ -769,6 +1232,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtdq2ps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] @@ -776,6 +1246,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtdq2ps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] @@ -783,6 +1260,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtdq2ps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtdq2ps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] @@ -790,6 +1274,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtdq2ps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtdq2ps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] @@ -797,6 +1288,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtdq2ps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtdq2ps: ; SKX: # %bb.0: ; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33] @@ -804,6 +1302,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtdq2ps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] @@ -811,6 +1316,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtdq2ps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtdq2ps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00] @@ -847,6 +1359,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtpd2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -854,6 +1373,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtpd2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -861,6 +1387,14 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtpd2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtpd2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -868,6 +1402,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtpd2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -875,6 +1416,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtpd2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -882,6 +1430,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtpd2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] @@ -889,6 +1444,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtpd2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtpd2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00] @@ -926,6 +1488,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtpd2ps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] @@ -933,6 +1502,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtpd2ps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] @@ -940,6 +1516,14 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtpd2ps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtpd2ps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] @@ -947,6 +1531,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtpd2ps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtpd2ps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] @@ -954,6 +1545,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtpd2ps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtpd2ps: ; SKX: # %bb.0: ; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] @@ -961,6 +1559,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtpd2ps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] @@ -968,6 +1573,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtpd2ps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtpd2ps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00] @@ -1005,6 +1617,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtps2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtps2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1012,6 +1631,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtps2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1019,6 +1645,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtps2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtps2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1026,6 +1659,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtps2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtps2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] @@ -1033,6 +1673,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtps2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtps2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33] @@ -1040,6 +1687,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtps2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] @@ -1047,6 +1701,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtps2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtps2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00] @@ -1084,6 +1745,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtps2pd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] +; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtps2pd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] @@ -1091,6 +1759,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtps2pd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] +; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtps2pd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] @@ -1098,6 +1773,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtps2pd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtps2pd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] @@ -1105,6 +1787,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtps2pd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtps2pd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -1112,6 +1801,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtps2pd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtps2pd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] @@ -1119,6 +1815,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtps2pd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtps2pd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00] @@ -1126,6 +1829,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtps2pd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtps2pd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00] @@ -1163,6 +1873,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsd2si: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] +; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsd2si: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] @@ -1170,6 +1887,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsd2si: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsd2si: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] @@ -1177,6 +1901,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsd2si: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsd2si: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] @@ -1184,6 +1915,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsd2si: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2si: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1191,6 +1929,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsd2si: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] +; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsd2si: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1198,6 +1943,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsd2si: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsd2si: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00] @@ -1205,6 +1957,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsd2si: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsd2si: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] @@ -1243,6 +2002,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsd2siq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] +; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsd2siq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] @@ -1250,6 +2016,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsd2siq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsd2siq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] @@ -1257,6 +2030,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsd2siq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsd2siq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] @@ -1264,6 +2044,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsd2siq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2siq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1271,6 +2058,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsd2siq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] +; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] +; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsd2siq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1278,6 +2072,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsd2siq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsd2siq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00] @@ -1285,6 +2086,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsd2siq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsd2siq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] @@ -1327,6 +2135,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsd2ss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsd2ss: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -1335,6 +2151,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsd2ss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsd2ss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -1343,6 +2167,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsd2ss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsd2ss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] @@ -1351,6 +2183,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsd2ss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsd2ss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1359,6 +2199,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsd2ss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] +; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsd2ss: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1367,6 +2215,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsd2ss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsd2ss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] @@ -1375,6 +2231,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsd2ss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsd2ss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] @@ -1411,6 +2275,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsi2sd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsi2sd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1418,6 +2289,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsi2sd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsi2sd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1425,6 +2303,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsi2sd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsi2sd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1432,6 +2317,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsi2sd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1439,6 +2331,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsi2sd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsi2sd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1446,6 +2345,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsi2sd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsi2sd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00] @@ -1453,6 +2359,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsi2sd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsi2sd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1488,6 +2401,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtsi2sdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtsi2sdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1495,6 +2415,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtsi2sdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtsi2sdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1502,6 +2429,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtsi2sdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtsi2sdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] @@ -1509,6 +2443,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtsi2sdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtsi2sdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1516,6 +2457,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtsi2sdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtsi2sdq: ; SKX: # %bb.0: ; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1523,6 +2471,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtsi2sdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtsi2sdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00] @@ -1530,6 +2485,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtsi2sdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtsi2sdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] @@ -1571,6 +2533,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvtss2sd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtss2sd: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] @@ -1579,6 +2549,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvtss2sd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] +; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvtss2sd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] @@ -1587,6 +2565,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvtss2sd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvtss2sd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] @@ -1595,6 +2581,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvtss2sd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvtss2sd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1603,6 +2597,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvtss2sd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] +; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvtss2sd: ; SKX: # %bb.0: ; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] @@ -1611,6 +2613,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvtss2sd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvtss2sd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] @@ -1619,6 +2629,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvtss2sd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvtss2sd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] @@ -1656,6 +2674,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttpd2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttpd2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -1663,6 +2688,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttpd2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttpd2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -1670,6 +2702,14 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttpd2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttpd2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] @@ -1677,6 +2717,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttpd2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttpd2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -1684,6 +2731,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttpd2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttpd2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] @@ -1691,6 +2745,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttpd2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] @@ -1698,6 +2759,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttpd2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttpd2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00] @@ -1736,6 +2804,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttps2dq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttps2dq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1743,6 +2818,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttps2dq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttps2dq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1750,6 +2832,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttps2dq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttps2dq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] @@ -1757,6 +2846,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttps2dq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttps2dq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] @@ -1764,6 +2860,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttps2dq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttps2dq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33] @@ -1771,6 +2874,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttps2dq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] @@ -1778,6 +2888,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttps2dq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00] +; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttps2dq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00] @@ -1813,6 +2930,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttsd2si: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] +; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttsd2si: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] @@ -1820,6 +2944,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttsd2si: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttsd2si: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] @@ -1827,6 +2958,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttsd2si: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttsd2si: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] @@ -1834,6 +2972,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttsd2si: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2si: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1841,6 +2986,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttsd2si: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] +; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] +; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttsd2si: ; SKX: # %bb.0: ; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] @@ -1848,6 +3000,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttsd2si: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttsd2si: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00] @@ -1855,6 +3014,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttsd2si: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttsd2si: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] @@ -1890,6 +3056,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_cvttsd2siq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] +; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] +; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvttsd2siq: ; SANDY: # %bb.0: ; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] @@ -1897,6 +3070,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_cvttsd2siq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] +; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] +; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_cvttsd2siq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] @@ -1904,6 +3084,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_cvttsd2siq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] +; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] +; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_cvttsd2siq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] @@ -1911,6 +3098,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_cvttsd2siq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] +; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_cvttsd2siq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1918,6 +3112,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_cvttsd2siq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] +; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] +; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_cvttsd2siq: ; SKX: # %bb.0: ; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] @@ -1925,6 +3126,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_cvttsd2siq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [8:1.00] +; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [3:1.00] +; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_cvttsd2siq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00] @@ -1932,6 +3140,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_cvttsd2siq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] +; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] +; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_cvttsd2siq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] @@ -1964,42 +3179,84 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [37:34.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_divpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:1.00] +; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] ; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_divpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_divpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_divpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_divpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_divpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_divpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_divpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_divpd: ; SKX: # %bb.0: ; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_divpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00] +; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_divpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] ; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_divpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00] +; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_divpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] @@ -2030,42 +3287,84 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:34.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_divsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:1.00] +; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] ; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_divsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_divsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00] ; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_divsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_divsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_divsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_divsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_divsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00] +; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_divsd: ; SKX: # %bb.0: ; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00] ; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_divsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00] +; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_divsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] ; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_divsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00] +; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_divsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] @@ -2099,36 +3398,71 @@ define void @test_lfence() { ; SLM-NEXT: lfence # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_lfence: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: lfence # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_lfence: ; SANDY: # %bb.0: ; SANDY-NEXT: lfence # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_lfence: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: lfence # sched: [2:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_lfence: ; HASWELL: # %bb.0: ; HASWELL-NEXT: lfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_lfence: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_lfence: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: lfence # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_lfence: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_lfence: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: lfence # sched: [2:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_lfence: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: lfence # sched: [2:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_lfence: ; SKX: # %bb.0: ; SKX-NEXT: lfence # sched: [2:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_lfence: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: lfence # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_lfence: ; BTVER2: # %bb.0: ; BTVER2-NEXT: lfence # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_lfence: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_lfence: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: lfence # sched: [1:0.50] @@ -2160,36 +3494,71 @@ define void @test_mfence() { ; SLM-NEXT: mfence # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mfence: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mfence # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mfence: ; SANDY: # %bb.0: ; SANDY-NEXT: mfence # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mfence: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mfence # sched: [2:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mfence: ; HASWELL: # %bb.0: ; HASWELL-NEXT: mfence # sched: [2:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mfence: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mfence: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: mfence # sched: [2:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mfence: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mfence: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: mfence # sched: [3:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mfence: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mfence # sched: [3:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mfence: ; SKX: # %bb.0: ; SKX-NEXT: mfence # sched: [3:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mfence: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mfence # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mfence: ; BTVER2: # %bb.0: ; BTVER2-NEXT: mfence # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mfence: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mfence: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: mfence # sched: [1:0.50] @@ -2219,36 +3588,71 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maskmovdqu: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maskmovdqu: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maskmovdqu: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maskmovdqu: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maskmovdqu: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maskmovdqu: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maskmovdqu: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maskmovdqu: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maskmovdqu: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maskmovdqu: ; SKX: # %bb.0: ; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maskmovdqu: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maskmovdqu: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maskmovdqu: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maskmovdqu: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:?] @@ -2277,42 +3681,84 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maxpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maxpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maxpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maxpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maxpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maxpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maxpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maxpd: ; SKX: # %bb.0: ; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maxpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maxpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maxpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2344,42 +3790,84 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_maxsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_maxsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_maxsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_maxsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_maxsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_maxsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_maxsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_maxsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_maxsd: ; SKX: # %bb.0: ; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_maxsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_maxsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_maxsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_maxsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2411,42 +3899,84 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_minpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_minpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_minpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_minpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_minpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_minpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_minpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_minpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_minpd: ; SKX: # %bb.0: ; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_minpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_minpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_minpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_minpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2478,42 +4008,84 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_minsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_minsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_minsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_minsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_minsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_minsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_minsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_minsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_minsd: ; SKX: # %bb.0: ; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_minsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_minsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_minsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_minsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -2548,6 +4120,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movapd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movapd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2555,6 +4134,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movapd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movapd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2562,6 +4148,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movapd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movapd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] @@ -2569,6 +4162,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movapd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movapd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2576,6 +4176,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movapd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movapd: ; SKX: # %bb.0: ; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] @@ -2583,6 +4190,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movapd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movapd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00] @@ -2590,6 +4204,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movapd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movapd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50] @@ -2624,6 +4245,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movdqa: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movdqa: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2631,6 +4259,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movdqa: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movdqa: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2638,6 +4273,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movdqa: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movdqa: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] @@ -2645,6 +4287,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movdqa: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movdqa: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2652,6 +4301,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movdqa: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movdqa: ; SKX: # %bb.0: ; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] @@ -2659,6 +4315,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movdqa: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movdqa: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00] @@ -2666,6 +4329,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movdqa: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movdqa: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50] @@ -2700,6 +4370,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movdqu: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movdqu: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2707,6 +4384,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movdqu: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movdqu: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2714,6 +4398,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movdqu: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movdqu: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] @@ -2721,6 +4412,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movdqu: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movdqu: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2728,6 +4426,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movdqu: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movdqu: ; SKX: # %bb.0: ; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] @@ -2735,6 +4440,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movdqu: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movdqu: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00] @@ -2742,6 +4454,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movdqu: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movdqu: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50] @@ -2785,6 +4504,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2795,6 +4524,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] +; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2805,6 +4544,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2815,6 +4564,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] @@ -2825,16 +4584,36 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movd: ; SKX: # %bb.0: -; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vmovd %edi, %xmm2 # sched: [1:1.00] -; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] +; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] +; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: vmovd %xmm2, (%rsi) # sched: [1:1.00] +; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] +; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] @@ -2845,6 +4624,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] @@ -2896,6 +4685,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movd_64: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] +; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movd_64: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2906,6 +4705,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movd_64: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] +; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movd_64: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2916,6 +4725,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movd_64: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movd_64: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2926,6 +4745,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movd_64: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movd_64: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] @@ -2936,16 +4765,36 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movd_64: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] +; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movd_64: ; SKX: # %bb.0: -; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vmovq %rdi, %xmm2 # sched: [1:1.00] -; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm2 # sched: [1:0.33] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] +; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] +; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKX-NEXT: vmovq %xmm2, (%rsi) # sched: [1:1.00] +; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movd_64: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movd_64: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] @@ -2956,6 +4805,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movd_64: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movd_64: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] @@ -2998,6 +4857,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movhpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movhpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] @@ -3005,6 +4871,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movhpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movhpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3012,6 +4885,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movhpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movhpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3019,6 +4899,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movhpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movhpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3026,6 +4913,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movhpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movhpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3033,6 +4927,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movhpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movhpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] @@ -3040,6 +4941,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movhpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movhpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] @@ -3077,6 +4985,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movlpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movlpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] @@ -3084,6 +4999,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movlpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movlpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3091,6 +5013,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movlpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movlpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3098,6 +5027,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movlpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movlpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3105,6 +5041,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movlpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movlpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3112,6 +5055,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movlpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movlpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] @@ -3119,6 +5069,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movlpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movlpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] @@ -3152,36 +5109,71 @@ define i32 @test_movmskpd(<2 x double> %a0) { ; SLM-NEXT: movmskpd %xmm0, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movmskpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movmskpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movmskpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movmskpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movmskpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movmskpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movmskpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movmskpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movmskpd: ; SKX: # %bb.0: ; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movmskpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movmskpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movmskpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00] @@ -3212,42 +5204,84 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movntdqa: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntdqa: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movntdqa: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movntdqa: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movntdqa: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movntdqa: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movntdqa: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movntdqa: ; SKX: # %bb.0: ; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movntdqa: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movntdqa: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movntdqa: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] @@ -3277,42 +5311,84 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movntpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movntpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movntpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movntpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movntpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movntpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movntpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movntpd: ; SKX: # %bb.0: ; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movntpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movntpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movntpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] @@ -3345,6 +5421,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movq_mem: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movq_mem: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] @@ -3352,6 +5435,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movq_mem: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movq_mem: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3359,6 +5449,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movq_mem: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movq_mem: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3366,6 +5463,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movq_mem: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movq_mem: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3373,6 +5477,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movq_mem: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movq_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] @@ -3380,6 +5491,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movq_mem: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movq_mem: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] @@ -3387,6 +5505,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movq_mem: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movq_mem: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] @@ -3422,42 +5547,84 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movq_reg: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:1.00] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movq_reg: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movq_reg: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movq_reg: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movq_reg: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movq_reg: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movq_reg: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movq_reg: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movq_reg: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movq_reg: ; SKX: # %bb.0: ; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movq_reg: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movq_reg: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] ; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movq_reg: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movq_reg: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] @@ -3490,6 +5657,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movsd_mem: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] +; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movsd_mem: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] @@ -3497,6 +5671,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movsd_mem: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movsd_mem: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3504,6 +5685,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movsd_mem: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movsd_mem: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3511,6 +5699,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movsd_mem: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movsd_mem: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3518,6 +5713,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movsd_mem: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] +; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movsd_mem: ; SKX: # %bb.0: ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] @@ -3525,6 +5727,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movsd_mem: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] +; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movsd_mem: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] @@ -3532,6 +5741,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movsd_mem: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movsd_mem: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] @@ -3567,36 +5783,78 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movsd_reg: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movsd_reg: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movsd_reg: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movsd_reg: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movsd_reg: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movsd_reg: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movsd_reg: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movsd_reg: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movsd_reg: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movsd_reg: ; SKX: # %bb.0: ; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movsd_reg: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movsd_reg: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movsd_reg: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movsd_reg: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] @@ -3627,6 +5885,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movupd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movupd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3634,6 +5899,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movupd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movupd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3641,6 +5913,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movupd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movupd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] @@ -3648,6 +5927,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movupd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movupd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3655,6 +5941,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movupd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movupd: ; SKX: # %bb.0: ; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] @@ -3662,6 +5955,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movupd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movupd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00] @@ -3669,6 +5969,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movupd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movupd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50] @@ -3700,42 +6007,84 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mulpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mulpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mulpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] +; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mulpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] ; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mulpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mulpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mulpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mulpd: ; SKX: # %bb.0: ; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mulpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00] +; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mulpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] +; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mulpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] @@ -3766,42 +6115,84 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mulsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mulsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50] +; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mulsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mulsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] +; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mulsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] ; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mulsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mulsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mulsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mulsd: ; SKX: # %bb.0: ; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mulsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00] +; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mulsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mulsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] +; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mulsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] @@ -3835,6 +6226,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_orpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_orpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3842,6 +6240,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_orpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_orpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3849,6 +6254,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_orpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_orpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -3856,6 +6268,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_orpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_orpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3863,6 +6282,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_orpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_orpd: ; SKX: # %bb.0: ; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -3870,6 +6296,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_orpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_orpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -3877,6 +6310,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_orpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_orpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -3917,42 +6357,84 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packssdw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packssdw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packssdw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packssdw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packssdw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packssdw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packssdw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packssdw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packssdw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packssdw: ; SKX: # %bb.0: ; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packssdw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packssdw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packssdw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packssdw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -3989,42 +6471,84 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packsswb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packsswb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packsswb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packsswb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packsswb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packsswb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packsswb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packsswb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packsswb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packsswb: ; SKX: # %bb.0: ; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packsswb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packsswb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packsswb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packsswb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4061,42 +6585,84 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packuswb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packuswb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packuswb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packuswb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packuswb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packuswb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packuswb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packuswb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packuswb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packuswb: ; SKX: # %bb.0: ; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packuswb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packuswb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packuswb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packuswb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4133,42 +6699,84 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddb: ; SKX: # %bb.0: ; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4203,42 +6811,84 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddd: ; SKX: # %bb.0: ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4269,42 +6919,84 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddq: ; SKX: # %bb.0: ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4339,42 +7031,84 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddsb: ; SKX: # %bb.0: ; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4410,42 +7144,84 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddsw: ; SKX: # %bb.0: ; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4481,42 +7257,84 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddusb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddusb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddusb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddusb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddusb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddusb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddusb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddusb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddusb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddusb: ; SKX: # %bb.0: ; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddusb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddusb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddusb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddusb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4552,42 +7370,84 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddusw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddusw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddusw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddusw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddusw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddusw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddusw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddusw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddusw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddusw: ; SKX: # %bb.0: ; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddusw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddusw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddusw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddusw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4623,42 +7483,84 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_paddw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_paddw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_paddw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_paddw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_paddw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_paddw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_paddw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_paddw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_paddw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_paddw: ; SKX: # %bb.0: ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_paddw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_paddw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_paddw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_paddw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4692,6 +7594,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pand: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pand: ; SANDY: # %bb.0: ; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4699,6 +7608,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pand: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pand: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4706,6 +7622,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pand: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pand: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4713,6 +7636,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pand: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pand: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4720,6 +7650,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pand: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pand: ; SKX: # %bb.0: ; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4727,6 +7664,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pand: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pand: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4734,6 +7678,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pand: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pand: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4775,6 +7726,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pandn: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pandn: ; SANDY: # %bb.0: ; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4782,6 +7742,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pandn: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pandn: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4789,6 +7758,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pandn: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pandn: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4796,6 +7774,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pandn: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pandn: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4803,6 +7790,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pandn: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pandn: ; SKX: # %bb.0: ; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -4810,6 +7806,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pandn: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pandn: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -4817,6 +7822,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pandn: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pandn: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4855,42 +7869,84 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pavgb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pavgb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pavgb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pavgb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pavgb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pavgb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pavgb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pavgb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pavgb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pavgb: ; SKX: # %bb.0: ; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pavgb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pavgb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pavgb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pavgb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -4935,42 +7991,84 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pavgw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pavgw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pavgw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pavgw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pavgw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pavgw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pavgw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pavgw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pavgw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pavgw: ; SKX: # %bb.0: ; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pavgw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pavgw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pavgw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pavgw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5016,6 +8114,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5023,6 +8128,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5030,6 +8142,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5037,6 +8156,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5044,14 +8170,27 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqb: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5059,6 +8198,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5097,6 +8243,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5104,6 +8257,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5111,6 +8271,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5118,6 +8285,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5125,14 +8299,27 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqd: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5140,6 +8327,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5178,6 +8372,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5185,6 +8386,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5192,6 +8400,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5199,6 +8414,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5206,14 +8428,27 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqw: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5221,6 +8456,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5260,6 +8502,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpgtb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpgtb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5267,6 +8517,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpgtb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpgtb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5274,6 +8532,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpgtb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpgtb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5281,6 +8547,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpgtb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5288,14 +8562,29 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpgtb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpgtb: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpgtb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpgtb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5303,6 +8592,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpgtb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpgtb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5342,6 +8639,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpgtd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpgtd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5349,6 +8654,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpgtd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpgtd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5356,6 +8669,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpgtd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpgtd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5363,6 +8684,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpgtd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5370,14 +8699,29 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpgtd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpgtd: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpgtd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpgtd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5385,6 +8729,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpgtd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpgtd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5424,6 +8776,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpgtw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpgtw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5431,6 +8791,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpgtw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpgtw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5438,6 +8806,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpgtw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpgtw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5445,6 +8821,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpgtw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5452,14 +8836,29 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpgtw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpgtw: ; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # sched: [3:1.00] -; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 # sched: [9:1.00] -; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00] -; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] +; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpgtw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpgtw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] @@ -5467,6 +8866,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpgtw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpgtw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] @@ -5500,42 +8907,84 @@ define i16 @test_pextrw(<8 x i16> %a0) { ; SLM-NEXT: # kill: def $ax killed $ax killed $eax ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: # kill: def $ax killed $ax killed $eax ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrw: ; SKX: # %bb.0: ; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: # kill: def $ax killed $ax killed $eax ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00] @@ -5568,42 +9017,84 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrw: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] @@ -5634,42 +9125,84 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaddwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaddwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaddwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaddwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaddwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaddwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaddwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaddwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaddwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaddwd: ; SKX: # %bb.0: ; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaddwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaddwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaddwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaddwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -5706,42 +9239,84 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxsw: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5777,42 +9352,84 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxub: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxub: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxub: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxub: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxub: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxub: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxub: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxub: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxub: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxub: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxub: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxub: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxub: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxub: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5848,42 +9465,84 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminsw: ; SKX: # %bb.0: ; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5919,42 +9578,84 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminub: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminub: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminub: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminub: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminub: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminub: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminub: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminub: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminub: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminub: ; SKX: # %bb.0: ; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminub: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminub: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminub: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminub: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -5985,36 +9686,71 @@ define i32 @test_pmovmskb(<16 x i8> %a0) { ; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovmskb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovmskb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovmskb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovmskb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovmskb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovmskb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovmskb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovmskb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovmskb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovmskb: ; SKX: # %bb.0: ; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovmskb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovmskb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovmskb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovmskb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] @@ -6043,42 +9779,84 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmulhuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmulhuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmulhuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmulhuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmulhuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmulhuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmulhuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmulhuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmulhuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmulhuw: ; SKX: # %bb.0: ; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmulhuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmulhuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmulhuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmulhuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6110,42 +9888,84 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmulhw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmulhw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmulhw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmulhw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmulhw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmulhw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmulhw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmulhw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmulhw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmulhw: ; SKX: # %bb.0: ; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmulhw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmulhw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmulhw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmulhw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6177,42 +9997,84 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmullw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmullw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmullw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmullw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmullw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmullw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmullw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmullw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmullw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmullw: ; SKX: # %bb.0: ; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmullw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmullw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmullw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmullw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6243,42 +10105,84 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmuludq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmuludq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmuludq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmuludq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmuludq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmuludq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmuludq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmuludq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmuludq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmuludq: ; SKX: # %bb.0: ; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmuludq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmuludq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmuludq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmuludq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -6314,6 +10218,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_por: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_por: ; SANDY: # %bb.0: ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6321,6 +10232,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_por: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_por: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6328,6 +10246,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_por: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_por: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6335,6 +10260,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_por: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_por: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6342,6 +10274,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_por: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_por: ; SKX: # %bb.0: ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -6349,6 +10288,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_por: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_por: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6356,6 +10302,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_por: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_por: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -6392,42 +10345,84 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psadbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psadbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psadbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psadbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psadbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psadbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psadbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psadbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psadbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] +; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psadbw: ; SKX: # %bb.0: ; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psadbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psadbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psadbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psadbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -6465,6 +10460,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pshufd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] +; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pshufd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] @@ -6472,6 +10474,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pshufd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pshufd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6479,6 +10488,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pshufd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pshufd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6486,6 +10502,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pshufd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pshufd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6493,6 +10516,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pshufd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] +; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pshufd: ; SKX: # %bb.0: ; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] @@ -6500,6 +10530,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pshufd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] +; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pshufd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] @@ -6507,6 +10544,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pshufd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25] +; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pshufd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50] @@ -6544,6 +10588,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pshufhw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] +; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pshufhw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] @@ -6551,6 +10602,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pshufhw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pshufhw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6558,6 +10616,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pshufhw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pshufhw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6565,6 +10630,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pshufhw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pshufhw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6572,6 +10644,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pshufhw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] +; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pshufhw: ; SKX: # %bb.0: ; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] @@ -6579,6 +10658,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pshufhw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] +; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pshufhw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] @@ -6586,6 +10672,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pshufhw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] +; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pshufhw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] @@ -6623,6 +10716,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pshuflw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] +; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pshuflw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] @@ -6630,6 +10730,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pshuflw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pshuflw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6637,6 +10744,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pshuflw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pshuflw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6644,6 +10758,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pshuflw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pshuflw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6651,6 +10772,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pshuflw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] +; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pshuflw: ; SKX: # %bb.0: ; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] @@ -6658,6 +10786,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pshuflw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] +; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pshuflw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] @@ -6665,6 +10800,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pshuflw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] +; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pshuflw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] @@ -6700,6 +10842,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pslld: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pslld: ; SANDY: # %bb.0: ; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6707,6 +10856,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pslld: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pslld: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6714,6 +10870,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pslld: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pslld: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6721,6 +10884,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pslld: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pslld: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6728,6 +10898,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pslld: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pslld: ; SKX: # %bb.0: ; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6735,6 +10912,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pslld: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pslld: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6742,6 +10926,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pslld: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pslld: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -6779,36 +10970,71 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pslldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pslldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pslldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pslldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pslldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pslldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pslldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pslldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pslldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pslldq: ; SKX: # %bb.0: ; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pslldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pslldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pslldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pslldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] @@ -6839,6 +11065,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psllq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psllq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6846,6 +11079,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psllq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psllq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6853,6 +11093,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psllq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psllq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6860,6 +11107,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psllq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psllq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6867,6 +11121,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psllq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psllq: ; SKX: # %bb.0: ; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6874,6 +11135,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psllq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psllq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6881,6 +11149,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psllq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psllq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -6918,6 +11193,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psllw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psllw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6925,6 +11207,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psllw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psllw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6932,6 +11221,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psllw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psllw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6939,6 +11235,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psllw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psllw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6946,6 +11249,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psllw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psllw: ; SKX: # %bb.0: ; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -6953,6 +11263,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psllw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psllw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -6960,6 +11277,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psllw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psllw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -6997,6 +11321,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrad: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrad: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7004,6 +11335,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrad: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrad: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7011,6 +11349,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrad: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrad: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7018,6 +11363,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrad: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrad: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7025,6 +11377,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrad: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrad: ; SKX: # %bb.0: ; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7032,6 +11391,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrad: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrad: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7039,6 +11405,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrad: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrad: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7076,6 +11449,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psraw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psraw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7083,6 +11463,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psraw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psraw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7090,6 +11477,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psraw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psraw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7097,6 +11491,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psraw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psraw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7104,6 +11505,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psraw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psraw: ; SKX: # %bb.0: ; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7111,6 +11519,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psraw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psraw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7118,6 +11533,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psraw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psraw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7155,6 +11577,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrld: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrld: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7162,6 +11591,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrld: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrld: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7169,6 +11605,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrld: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrld: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7176,6 +11619,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrld: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrld: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7183,6 +11633,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrld: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrld: ; SKX: # %bb.0: ; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7190,6 +11647,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrld: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrld: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7197,6 +11661,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrld: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrld: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7234,36 +11705,71 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrldq: ; SKX: # %bb.0: ; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] @@ -7294,6 +11800,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrlq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrlq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7301,6 +11814,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrlq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrlq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7308,6 +11828,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrlq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrlq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7315,6 +11842,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrlq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrlq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7322,6 +11856,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrlq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrlq: ; SKX: # %bb.0: ; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7329,6 +11870,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrlq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7336,6 +11884,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrlq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrlq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7373,6 +11928,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psrlw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psrlw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7380,6 +11942,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psrlw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psrlw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7387,6 +11956,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psrlw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psrlw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7394,6 +11970,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psrlw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psrlw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7401,6 +11984,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psrlw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psrlw: ; SKX: # %bb.0: ; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] @@ -7408,6 +11998,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psrlw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -7415,6 +12012,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psrlw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psrlw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -7453,42 +12057,84 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubb: ; SKX: # %bb.0: ; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7523,42 +12169,84 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubd: ; SKX: # %bb.0: ; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7589,42 +12277,84 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubq: ; SKX: # %bb.0: ; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7659,42 +12389,84 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubsb: ; SKX: # %bb.0: ; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7730,42 +12502,84 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubsw: ; SKX: # %bb.0: ; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7801,42 +12615,84 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubusb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubusb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubusb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubusb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubusb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubusb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubusb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubusb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubusb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubusb: ; SKX: # %bb.0: ; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubusb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubusb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubusb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubusb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7872,42 +12728,84 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubusw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubusw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubusw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubusw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubusw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubusw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubusw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubusw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubusw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubusw: ; SKX: # %bb.0: ; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubusw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubusw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubusw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubusw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -7943,42 +12841,84 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psubw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psubw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psubw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psubw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psubw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psubw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psubw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psubw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psubw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psubw: ; SKX: # %bb.0: ; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psubw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psubw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psubw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psubw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -8013,42 +12953,84 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhbw: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] ; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] ; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] @@ -8084,6 +13066,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -8091,6 +13080,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8098,6 +13094,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8105,6 +13108,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8112,6 +13122,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhdq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] @@ -8119,6 +13136,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] @@ -8126,6 +13150,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] @@ -8161,6 +13192,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhqdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhqdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -8168,6 +13206,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhqdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhqdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8175,6 +13220,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhqdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhqdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8182,6 +13234,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhqdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhqdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8189,6 +13248,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhqdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhqdq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -8196,6 +13262,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhqdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhqdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -8203,6 +13276,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhqdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhqdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] @@ -8239,42 +13319,84 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckhwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckhwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckhwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckhwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckhwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckhwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckhwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckhwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckhwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckhwd: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckhwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckhwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckhwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckhwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] @@ -8309,42 +13431,84 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpcklbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpcklbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpcklbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpcklbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpcklbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpcklbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpcklbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpcklbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpcklbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] +; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpcklbw: ; SKX: # %bb.0: ; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] ; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpcklbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpcklbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] ; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpcklbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpcklbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] @@ -8380,6 +13544,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpckldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpckldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -8387,6 +13558,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpckldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpckldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8394,6 +13572,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpckldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpckldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8401,6 +13586,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpckldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpckldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8408,6 +13600,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpckldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpckldq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] @@ -8415,6 +13614,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpckldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpckldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] @@ -8422,6 +13628,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpckldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpckldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] @@ -8457,6 +13670,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpcklqdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpcklqdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -8464,6 +13684,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpcklqdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpcklqdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8471,6 +13698,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpcklqdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpcklqdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8478,6 +13712,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpcklqdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpcklqdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8485,6 +13726,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpcklqdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpcklqdq: ; SKX: # %bb.0: ; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -8492,6 +13740,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpcklqdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpcklqdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -8499,6 +13754,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpcklqdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpcklqdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] @@ -8535,42 +13797,84 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_punpcklwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_punpcklwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_punpcklwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_punpcklwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_punpcklwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_punpcklwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_punpcklwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_punpcklwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_punpcklwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_punpcklwd: ; SKX: # %bb.0: ; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] ; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_punpcklwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_punpcklwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] ; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_punpcklwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] +; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_punpcklwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] @@ -8604,6 +13908,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pxor: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pxor: ; SANDY: # %bb.0: ; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8611,6 +13922,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pxor: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pxor: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8618,6 +13936,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pxor: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pxor: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8625,6 +13950,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pxor: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pxor: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8632,6 +13964,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pxor: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pxor: ; SKX: # %bb.0: ; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -8639,6 +13978,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pxor: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pxor: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -8646,6 +13992,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pxor: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pxor: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -8681,6 +14034,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_shufpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_shufpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8688,6 +14048,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_shufpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8695,6 +14062,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_shufpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_shufpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8702,6 +14076,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_shufpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_shufpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8709,6 +14090,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_shufpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_shufpd: ; SKX: # %bb.0: ; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] @@ -8716,6 +14104,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_shufpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] @@ -8723,6 +14118,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_shufpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_shufpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] @@ -8759,6 +14161,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_sqrtpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00] +; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00] @@ -8766,6 +14175,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_sqrtpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] @@ -8773,6 +14189,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_sqrtpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [25:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_sqrtpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] @@ -8780,6 +14203,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_sqrtpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_sqrtpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] @@ -8787,6 +14217,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_sqrtpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:1.00] +; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_sqrtpd: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00] @@ -8794,6 +14231,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_sqrtpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] +; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:21.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:21.00] @@ -8801,6 +14245,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_sqrtpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00] +; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_sqrtpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00] @@ -8842,6 +14293,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_sqrtsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00] +; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] @@ -8850,6 +14309,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_sqrtsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] @@ -8858,6 +14325,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_sqrtsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_sqrtsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] @@ -8866,6 +14341,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_sqrtsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_sqrtsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] @@ -8874,6 +14357,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_sqrtsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:1.00] +; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_sqrtsd: ; SKX: # %bb.0: ; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00] @@ -8882,6 +14373,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_sqrtsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00] +; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [26:21.00] +; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [26:21.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_sqrtsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00] @@ -8890,6 +14389,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_sqrtsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_sqrtsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50] @@ -8924,42 +14431,84 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_subpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_subpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_subpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_subpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_subpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_subpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_subpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_subpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_subpd: ; SKX: # %bb.0: ; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_subpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_subpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_subpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_subpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -8990,42 +14539,84 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_subsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_subsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_subsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_subsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_subsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_subsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_subsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_subsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_subsd: ; SKX: # %bb.0: ; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_subsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_subsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_subsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_subsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -9080,6 +14671,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_ucomisd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] +; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_ucomisd: ; SANDY: # %bb.0: ; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -9094,6 +14699,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_ucomisd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_ucomisd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9108,6 +14727,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_ucomisd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_ucomisd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9122,6 +14755,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_ucomisd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_ucomisd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -9136,6 +14783,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_ucomisd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] +; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_ucomisd: ; SKX: # %bb.0: ; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] @@ -9150,6 +14811,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_ucomisd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_ucomisd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9164,6 +14839,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_ucomisd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_ucomisd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] @@ -9207,6 +14896,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_unpckhpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpckhpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9214,6 +14910,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_unpckhpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9221,6 +14924,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_unpckhpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_unpckhpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9228,6 +14938,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_unpckhpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_unpckhpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9235,6 +14952,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_unpckhpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_unpckhpd: ; SKX: # %bb.0: ; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] @@ -9242,6 +14966,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_unpckhpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -9249,6 +14980,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_unpckhpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_unpckhpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] @@ -9290,6 +15028,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_unpcklpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SANDY-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpcklpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9297,6 +15044,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_unpcklpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9304,6 +15060,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_unpcklpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_unpcklpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9311,6 +15076,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_unpcklpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9318,6 +15092,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_unpcklpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] +; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33] +; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_unpcklpd: ; SKX: # %bb.0: ; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] @@ -9325,6 +15108,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_unpcklpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -9332,6 +15124,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_unpcklpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_unpcklpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] @@ -9367,6 +15168,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_xorpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_xorpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -9374,6 +15182,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_xorpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -9381,6 +15196,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_xorpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_xorpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -9388,6 +15210,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_xorpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_xorpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -9395,6 +15224,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_xorpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_xorpd: ; SKX: # %bb.0: ; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] @@ -9402,6 +15238,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_xorpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -9409,6 +15252,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_xorpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_xorpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll index dc23cc6968d..50bdf203c74 100644 --- a/llvm/test/CodeGen/X86/sse3-schedule.ll +++ b/llvm/test/CodeGen/X86/sse3-schedule.ll @@ -1,15 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ATOM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_addsubpd: @@ -30,42 +38,84 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: addsubpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addsubpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addsubpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addsubpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addsubpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addsubpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addsubpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addsubpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addsubpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addsubpd: ; SKX: # %bb.0: ; SKX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addsubpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addsubpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addsubpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -97,42 +147,84 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SLM-NEXT: addsubps (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_addsubps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addsubps: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_addsubps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_addsubps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_addsubps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_addsubps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_addsubps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_addsubps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_addsubps: ; SKX: # %bb.0: ; SKX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_addsubps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_addsubps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [10:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_addsubps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] @@ -164,42 +256,84 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SLM-NEXT: haddpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_haddpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] +; SANDY-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_haddpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_haddpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] +; HASWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_haddpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] +; BROADWELL-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [10:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_haddpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; BROADWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_haddpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] +; SKYLAKE-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_haddpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_haddpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [6:2.00] +; SKX-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [12:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_haddpd: ; SKX: # %bb.0: ; SKX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_haddpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_haddpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_haddpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -231,42 +365,84 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SLM-NEXT: haddps (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_haddps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] +; SANDY-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_haddps: ; SANDY: # %bb.0: ; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_haddps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] +; HASWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_haddps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_haddps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] +; BROADWELL-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [10:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_haddps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; BROADWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_haddps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] +; SKYLAKE-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_haddps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_haddps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [6:2.00] +; SKX-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [12:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_haddps: ; SKX: # %bb.0: ; SKX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_haddps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_haddps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_haddps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_haddps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -298,42 +474,84 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SLM-NEXT: hsubpd (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_hsubpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] +; SANDY-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_hsubpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_hsubpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] +; HASWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_hsubpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] +; BROADWELL-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [10:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_hsubpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; BROADWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_hsubpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] +; SKYLAKE-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_hsubpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_hsubpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [6:2.00] +; SKX-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [12:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_hsubpd: ; SKX: # %bb.0: ; SKX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_hsubpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_hsubpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_hsubpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -365,42 +583,84 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SLM-NEXT: hsubps (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_hsubps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] +; SANDY-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_hsubps: ; SANDY: # %bb.0: ; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_hsubps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] +; HASWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_hsubps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] +; BROADWELL-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [10:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_hsubps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] ; BROADWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_hsubps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] +; SKYLAKE-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_hsubps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKYLAKE-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_hsubps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [6:2.00] +; SKX-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [12:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_hsubps: ; SKX: # %bb.0: ; SKX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [6:2.00] ; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_hsubps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_hsubps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_hsubps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -431,36 +691,71 @@ define <16 x i8> @test_lddqu(i8* %a0) { ; SLM-NEXT: lddqu (%rdi), %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_lddqu: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_lddqu: ; SANDY: # %bb.0: ; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_lddqu: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_lddqu: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_lddqu: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_lddqu: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_lddqu: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_lddqu: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_lddqu: ; SKX: # %bb.0: ; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_lddqu: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [5:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_lddqu: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_lddqu: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50] @@ -492,6 +787,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; SLM-NEXT: monitor # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_monitor: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; SANDY-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33] +; SANDY-SSE-NEXT: monitor # sched: [100:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_monitor: ; SANDY: # %bb.0: ; SANDY-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -499,6 +801,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; SANDY-NEXT: monitor # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_monitor: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; HASWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; HASWELL-SSE-NEXT: monitor # sched: [100:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_monitor: ; HASWELL: # %bb.0: ; HASWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -506,6 +815,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; HASWELL-NEXT: monitor # sched: [100:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_monitor: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; BROADWELL-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; BROADWELL-SSE-NEXT: monitor # sched: [100:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_monitor: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -513,6 +829,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; BROADWELL-NEXT: monitor # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_monitor: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: monitor # sched: [100:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_monitor: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -520,6 +843,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; SKYLAKE-NEXT: monitor # sched: [100:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_monitor: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; SKX-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; SKX-SSE-NEXT: monitor # sched: [100:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_monitor: ; SKX: # %bb.0: ; SKX-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -527,6 +857,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; SKX-NEXT: monitor # sched: [100:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_monitor: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-SSE-NEXT: monitor # sched: [100:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_monitor: ; BTVER2: # %bb.0: ; BTVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50] @@ -534,6 +871,13 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) { ; BTVER2-NEXT: monitor # sched: [100:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_monitor: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl %esi, %ecx # sched: [1:0.25] +; ZNVER1-SSE-NEXT: monitor # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_monitor: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25] @@ -569,6 +913,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movddup: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] +; SANDY-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] +; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movddup: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] @@ -576,6 +927,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movddup: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] +; HASWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] +; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movddup: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] @@ -583,6 +941,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movddup: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] +; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movddup: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] @@ -590,6 +955,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movddup: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] +; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movddup: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] @@ -597,6 +969,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movddup: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] +; SKX-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [5:0.50] +; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movddup: ; SKX: # %bb.0: ; SKX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] @@ -604,6 +983,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movddup: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] +; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:1.00] +; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movddup: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:1.00] @@ -611,6 +997,13 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movddup: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movddup: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50] @@ -648,6 +1041,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movshdup: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] +; SANDY-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movshdup: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] @@ -655,6 +1055,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movshdup: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] +; HASWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] @@ -662,6 +1069,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movshdup: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [5:0.50] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movshdup: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] @@ -669,6 +1083,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movshdup: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movshdup: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] @@ -676,6 +1097,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movshdup: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] +; SKX-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movshdup: ; SKX: # %bb.0: ; SKX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] @@ -683,6 +1111,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movshdup: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] +; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:1.00] @@ -690,6 +1125,13 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movshdup: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50] +; ZNVER1-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movshdup: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50] @@ -727,6 +1169,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movsldup: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] +; SANDY-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movsldup: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] @@ -734,6 +1183,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movsldup: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] +; HASWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] @@ -741,6 +1197,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movsldup: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] +; BROADWELL-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [5:0.50] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movsldup: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] @@ -748,6 +1211,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movsldup: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movsldup: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] @@ -755,6 +1225,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movsldup: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] +; SKX-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movsldup: ; SKX: # %bb.0: ; SKX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] @@ -762,6 +1239,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movsldup: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50] +; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:1.00] @@ -769,6 +1253,13 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movsldup: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [100:?] +; ZNVER1-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [100:?] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movsldup: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50] @@ -804,6 +1295,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; SLM-NEXT: mwait # sched: [100:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mwait: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33] +; SANDY-SSE-NEXT: movl %esi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: mwait # sched: [100:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mwait: ; SANDY: # %bb.0: ; SANDY-NEXT: movl %edi, %ecx # sched: [1:0.33] @@ -811,6 +1309,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; SANDY-NEXT: mwait # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mwait: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] +; HASWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: mwait # sched: [20:2.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mwait: ; HASWELL: # %bb.0: ; HASWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] @@ -818,6 +1323,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; HASWELL-NEXT: mwait # sched: [20:2.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mwait: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: mwait # sched: [100:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mwait: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: movl %edi, %ecx # sched: [1:0.25] @@ -825,6 +1337,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; BROADWELL-NEXT: mwait # sched: [100:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mwait: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: mwait # sched: [20:2.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mwait: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: movl %edi, %ecx # sched: [1:0.25] @@ -832,6 +1351,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; SKYLAKE-NEXT: mwait # sched: [20:2.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mwait: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] +; SKX-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: mwait # sched: [20:2.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mwait: ; SKX: # %bb.0: ; SKX-NEXT: movl %edi, %ecx # sched: [1:0.25] @@ -839,6 +1365,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; SKX-NEXT: mwait # sched: [20:2.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mwait: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: mwait # sched: [100:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mwait: ; BTVER2: # %bb.0: ; BTVER2-NEXT: movl %edi, %ecx # sched: [1:0.50] @@ -846,6 +1379,13 @@ define void @test_mwait(i32 %a0, i32 %a1) { ; BTVER2-NEXT: mwait # sched: [100:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mwait: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movl %edi, %ecx # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl %esi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: mwait # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mwait: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25] diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index 88cb90fdb43..4bfcebf3cb9 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -1,14 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_blendpd: @@ -25,6 +33,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] @@ -32,6 +47,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] @@ -39,6 +61,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] @@ -46,6 +75,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] @@ -53,14 +89,27 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [6:0.50] +; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00] +; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] @@ -68,6 +117,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] @@ -96,6 +152,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; SANDY-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendps: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] @@ -103,6 +166,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -110,6 +180,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -117,6 +194,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -124,6 +208,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendps: ; SKX: # %bb.0: ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -131,6 +222,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] @@ -138,6 +236,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] @@ -170,42 +275,105 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendvpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] +; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00] +; SANDY-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; SANDY-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendvpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendvpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; HASWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; HASWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendvpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendvpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendvpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKYLAKE-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKYLAKE-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendvpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] +; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKX-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKX-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendvpd: ; SKX: # %bb.0: ; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendvpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BTVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BTVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendvpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [3:0.33] +; ZNVER1-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [11:0.67] +; ZNVER1-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendvpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -237,42 +405,105 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendvps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] +; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00] +; SANDY-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; SANDY-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendvps: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendvps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; HASWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; HASWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendvps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendvps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendvps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKYLAKE-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKYLAKE-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendvps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] +; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKX-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKX-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendvps: ; SKX: # %bb.0: ; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendvps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BTVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BTVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendvps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [3:0.33] +; ZNVER1-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [11:0.67] +; ZNVER1-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendvps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -298,42 +529,84 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_dppd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_dppd: ; SANDY: # %bb.0: ; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_dppd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_dppd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_dppd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_dppd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_dppd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_dppd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_dppd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_dppd: ; SKX: # %bb.0: ; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_dppd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00] +; BTVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:3.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_dppd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00] ; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_dppd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_dppd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -359,42 +632,84 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_dpps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] +; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_dpps: ; SANDY: # %bb.0: ; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_dpps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] +; HASWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [20:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_dpps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] ; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_dpps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] +; BROADWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_dpps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] ; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_dpps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.50] +; SKYLAKE-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50] ; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_dpps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.33] +; SKX-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_dpps: ; SKX: # %bb.0: ; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] ; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_dpps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00] +; BTVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [16:3.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_dpps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00] ; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_dpps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_dpps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -420,42 +735,84 @@ define i32 @test_extractps(<4 x float> %a0, i32 *%a1) { ; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_extractps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_extractps: ; SANDY: # %bb.0: ; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_extractps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_extractps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_extractps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_extractps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_extractps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_extractps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_extractps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_extractps: ; SKX: # %bb.0: ; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_extractps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_extractps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_extractps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:2.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_extractps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00] @@ -482,42 +839,84 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) ; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_insertps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_insertps: ; SANDY: # %bb.0: ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_insertps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_insertps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_insertps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_insertps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_insertps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_insertps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_insertps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_insertps: ; SKX: # %bb.0: ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_insertps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] +; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_insertps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_insertps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] +; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_insertps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] @@ -541,36 +940,71 @@ define <2 x i64> @test_movntdqa(i8* %a0) { ; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movntdqa: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntdqa: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movntdqa: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movntdqa: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movntdqa: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movntdqa: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movntdqa: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movntdqa: ; SKX: # %bb.0: ; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movntdqa: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movntdqa: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movntdqa: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50] @@ -593,42 +1027,84 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mpsadbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mpsadbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mpsadbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] +; HASWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mpsadbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mpsadbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mpsadbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mpsadbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] +; SKYLAKE-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mpsadbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] +; SKX-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mpsadbw: ; SKX: # %bb.0: ; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mpsadbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00] +; BTVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mpsadbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mpsadbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mpsadbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -655,42 +1131,84 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packusdw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packusdw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packusdw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packusdw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packusdw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packusdw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packusdw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packusdw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packusdw: ; SKX: # %bb.0: ; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packusdw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packusdw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packusdw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packusdw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -723,42 +1241,105 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pblendvb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00] +; SANDY-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; SANDY-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pblendvb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pblendvb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; HASWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pblendvb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pblendvb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pblendvb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pblendvb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKYLAKE-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKYLAKE-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pblendvb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKX-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKX-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pblendvb: ; SKX: # %bb.0: ; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pblendvb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BTVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BTVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pblendvb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pblendvb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pblendvb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -786,6 +1367,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pblendw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] +; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pblendw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] @@ -793,6 +1381,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pblendw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pblendw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -800,6 +1395,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pblendw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pblendw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -807,6 +1409,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pblendw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -814,6 +1423,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pblendw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; SKX-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pblendw: ; SKX: # %bb.0: ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -821,6 +1437,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pblendw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] +; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pblendw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] @@ -828,6 +1451,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pblendw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] +; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pblendw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] @@ -854,42 +1484,84 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqq: ; SKX: # %bb.0: ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -916,42 +1588,84 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrb: ; SKX: # %bb.0: ; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:2.00] @@ -979,6 +1693,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -986,6 +1707,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -993,6 +1721,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -1000,6 +1735,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] @@ -1007,6 +1749,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrd: ; SKX: # %bb.0: ; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] @@ -1014,6 +1763,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -1021,6 +1777,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25] @@ -1047,42 +1810,84 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrq: ; SKX: # %bb.0: ; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:2.00] @@ -1107,42 +1912,84 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrw: ; SKX: # %bb.0: ; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:2.00] @@ -1168,42 +2015,84 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phminposuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phminposuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phminposuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phminposuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phminposuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phminposuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phminposuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phminposuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phminposuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phminposuw: ; SKX: # %bb.0: ; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phminposuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phminposuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phminposuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phminposuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] @@ -1229,42 +2118,84 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrb: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] @@ -1289,42 +2220,84 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrd: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] @@ -1351,6 +2324,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] @@ -1358,6 +2338,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1365,6 +2352,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1372,6 +2366,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1379,6 +2380,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrq: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1386,6 +2394,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] @@ -1393,6 +2408,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50] @@ -1419,42 +2441,84 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxsb: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1480,42 +2544,84 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxsd: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1541,42 +2647,84 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxud: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxud: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxud: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxud: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxud: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxud: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxud: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxud: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxud: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxud: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxud: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxud: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxud: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1602,42 +2750,84 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxuw: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1663,42 +2853,84 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminsb: ; SKX: # %bb.0: ; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1724,42 +2956,84 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminsd: ; SKX: # %bb.0: ; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1785,42 +3059,84 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminud: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminud: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminud: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminud: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminud: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminud: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminud: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminud: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminud: ; SKX: # %bb.0: ; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminud: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminud: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminud: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminud: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1846,42 +3162,84 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminuw: ; SKX: # %bb.0: ; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1910,6 +3268,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] @@ -1917,6 +3282,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1924,6 +3296,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1931,6 +3310,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1938,6 +3324,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxbw: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1945,6 +3338,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] @@ -1952,6 +3352,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50] @@ -1982,6 +3389,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxbd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxbd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] @@ -1989,6 +3403,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxbd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxbd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -1996,6 +3417,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxbd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxbd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -2003,6 +3431,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxbd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -2010,6 +3445,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxbd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxbd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -2017,6 +3459,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxbd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxbd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] @@ -2024,6 +3473,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxbd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50] @@ -2054,6 +3510,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxbq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxbq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] @@ -2061,6 +3524,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxbq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxbq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2068,6 +3538,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxbq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxbq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2075,6 +3552,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxbq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2082,6 +3566,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxbq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxbq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2089,6 +3580,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxbq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxbq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] @@ -2096,6 +3594,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxbq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxbq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50] @@ -2126,6 +3631,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] @@ -2133,6 +3645,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2140,6 +3659,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2147,6 +3673,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2154,6 +3687,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxdq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2161,6 +3701,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] @@ -2168,6 +3715,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50] @@ -2198,6 +3752,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] @@ -2205,6 +3766,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2212,6 +3780,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2219,6 +3794,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2226,6 +3808,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxwd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2233,6 +3822,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] @@ -2240,6 +3836,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50] @@ -2270,6 +3873,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxwq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxwq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] @@ -2277,6 +3887,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxwq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxwq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2284,6 +3901,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxwq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxwq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2291,6 +3915,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxwq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2298,6 +3929,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxwq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxwq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2305,6 +3943,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxwq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxwq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] @@ -2312,6 +3957,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxwq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50] @@ -2342,6 +3994,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] @@ -2349,6 +4008,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2356,6 +4022,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2363,6 +4036,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2370,6 +4050,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxbw: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2377,6 +4064,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] @@ -2384,6 +4078,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] @@ -2414,6 +4115,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxbd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxbd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] @@ -2421,6 +4129,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxbd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxbd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2428,6 +4143,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxbd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxbd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2435,6 +4157,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxbd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2442,6 +4171,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxbd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxbd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2449,6 +4185,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxbd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxbd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] @@ -2456,6 +4199,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxbd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] @@ -2486,6 +4236,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxbq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxbq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] @@ -2493,6 +4250,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxbq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxbq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2500,6 +4264,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxbq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxbq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2507,6 +4278,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxbq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2514,6 +4292,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxbq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxbq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2521,6 +4306,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxbq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxbq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] @@ -2528,6 +4320,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxbq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxbq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] @@ -2558,6 +4357,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] @@ -2565,6 +4371,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2572,6 +4385,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2579,6 +4399,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2586,6 +4413,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxdq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2593,6 +4427,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] @@ -2600,6 +4441,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50] @@ -2630,6 +4478,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] @@ -2637,6 +4492,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2644,6 +4506,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2651,6 +4520,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2658,6 +4534,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxwd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2665,6 +4548,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] @@ -2672,6 +4562,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] @@ -2702,6 +4599,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxwq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxwq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] @@ -2709,6 +4613,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxwq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxwq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2716,6 +4627,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxwq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxwq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2723,6 +4641,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxwq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2730,6 +4655,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxwq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxwq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2737,6 +4669,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxwq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxwq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] @@ -2744,6 +4683,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxwq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] @@ -2771,42 +4717,84 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmuldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmuldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmuldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmuldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmuldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmuldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmuldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmuldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmuldq: ; SKX: # %bb.0: ; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmuldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmuldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmuldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmuldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -2833,42 +4821,84 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmulld: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmulld: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmulld: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] +; HASWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmulld: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmulld: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] +; BROADWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [15:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmulld: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] ; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmulld: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] +; SKYLAKE-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] ; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmulld: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67] +; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmulld: ; SKX: # %bb.0: ; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67] ; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmulld: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmulld: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmulld: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmulld: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -2901,6 +4931,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_ptest: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setb %al # sched: [1:0.50] +; SANDY-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setb %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_ptest: ; SANDY: # %bb.0: ; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] @@ -2911,6 +4951,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_ptest: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: setb %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setb %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_ptest: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] @@ -2921,6 +4971,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_ptest: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: setb %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_ptest: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] @@ -2931,6 +4991,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_ptest: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: setb %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: setb %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_ptest: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] @@ -2941,6 +5011,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_ptest: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] +; SKX-SSE-NEXT: setb %al # sched: [1:0.50] +; SKX-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: setb %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_ptest: ; SKX: # %bb.0: ; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] @@ -2951,6 +5031,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_ptest: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setb %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setb %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_ptest: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] @@ -2961,6 +5051,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_ptest: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: setb %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: setb %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_ptest: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00] @@ -2994,6 +5094,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] @@ -3001,6 +5108,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] @@ -3008,6 +5122,14 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00] @@ -3015,6 +5137,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] @@ -3022,6 +5151,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: ; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] @@ -3029,6 +5165,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00] @@ -3036,6 +5179,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00] @@ -3066,6 +5216,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundps: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] @@ -3073,6 +5230,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] @@ -3080,6 +5244,14 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: roundps $7, (%rdi), %xmm1 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundps $7, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00] @@ -3087,6 +5259,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] @@ -3094,6 +5273,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: ; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] @@ -3101,6 +5287,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00] @@ -3108,6 +5301,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00] @@ -3139,6 +5339,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; SANDY-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3146,6 +5354,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; HASWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] @@ -3153,6 +5369,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] @@ -3160,6 +5384,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] @@ -3167,6 +5399,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67] +; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundsd: ; SKX: # %bb.0: ; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] @@ -3174,6 +5414,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3181,6 +5429,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] @@ -3212,6 +5468,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; SANDY-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundss: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3219,6 +5483,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; HASWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] @@ -3226,6 +5498,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] @@ -3233,6 +5513,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] @@ -3240,6 +5528,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67] +; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundss: ; SKX: # %bb.0: ; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] @@ -3247,6 +5543,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3254,6 +5558,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] diff --git a/llvm/test/CodeGen/X86/sse42-schedule.ll b/llvm/test/CodeGen/X86/sse42-schedule.ll index b728bbfd68a..eb4ad7371ef 100644 --- a/llvm/test/CodeGen/X86/sse42-schedule.ll +++ b/llvm/test/CodeGen/X86/sse42-schedule.ll @@ -1,14 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; GENERIC-LABEL: crc32_32_8: @@ -25,6 +33,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: crc32_32_8: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: crc32_32_8: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -32,6 +47,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: crc32_32_8: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # %bb.0: ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -39,6 +61,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: crc32_32_8: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: crc32_32_8: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -46,6 +75,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: crc32_32_8: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: crc32_32_8: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -53,6 +89,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: crc32_32_8: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SKX-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: crc32_32_8: ; SKX: # %bb.0: ; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -60,6 +103,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: crc32_32_8: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: crc32b %sil, %edi # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32b (%rdx), %edi # sched: [6:2.00] +; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # %bb.0: ; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:2.00] @@ -67,6 +117,13 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: crc32_32_8: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] +; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: crc32_32_8: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -95,6 +152,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: crc32_32_16: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] +; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: crc32_32_16: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -102,6 +166,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: crc32_32_16: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # %bb.0: ; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -109,6 +180,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: crc32_32_16: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: crc32_32_16: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -116,6 +194,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: crc32_32_16: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: crc32_32_16: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -123,6 +208,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: crc32_32_16: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] +; SKX-SSE-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: crc32_32_16: ; SKX: # %bb.0: ; SKX-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -130,6 +222,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: crc32_32_16: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: crc32w %si, %edi # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32w (%rdx), %edi # sched: [6:2.00] +; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # %bb.0: ; BTVER2-NEXT: crc32w %si, %edi # sched: [3:2.00] @@ -137,6 +236,13 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: crc32_32_16: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: crc32w %si, %edi # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32w (%rdx), %edi # sched: [10:1.00] +; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: crc32_32_16: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: crc32w %si, %edi # sched: [3:1.00] @@ -165,6 +271,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SLM-NEXT: movl %edi, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: crc32_32_32: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; SANDY-SSE-NEXT: movl %edi, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: crc32_32_32: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -172,6 +285,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: crc32_32_32: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] +; HASWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # %bb.0: ; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -179,6 +299,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: crc32_32_32: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] +; BROADWELL-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: crc32_32_32: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -186,6 +313,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; BROADWELL-NEXT: movl %edi, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: crc32_32_32: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: crc32_32_32: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -193,6 +327,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SKYLAKE-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: crc32_32_32: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; SKX-SSE-NEXT: crc32l (%rdx), %edi # sched: [8:1.00] +; SKX-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: crc32_32_32: ; SKX: # %bb.0: ; SKX-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -200,6 +341,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SKX-NEXT: movl %edi, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: crc32_32_32: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: crc32l %esi, %edi # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32l (%rdx), %edi # sched: [6:2.00] +; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # %bb.0: ; BTVER2-NEXT: crc32l %esi, %edi # sched: [3:2.00] @@ -207,6 +355,13 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; BTVER2-NEXT: movl %edi, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: crc32_32_32: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32l (%rdx), %edi # sched: [10:1.00] +; ZNVER1-SSE-NEXT: movl %edi, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: crc32_32_32: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: crc32l %esi, %edi # sched: [3:1.00] @@ -235,6 +390,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: crc32_64_8: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: crc32_64_8: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -242,6 +404,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: crc32_64_8: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # %bb.0: ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -249,6 +418,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: crc32_64_8: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: crc32_64_8: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -256,6 +432,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: crc32_64_8: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: crc32_64_8: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -263,6 +446,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: crc32_64_8: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; SKX-SSE-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: crc32_64_8: ; SKX: # %bb.0: ; SKX-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -270,6 +460,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: crc32_64_8: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: crc32b %sil, %edi # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32b (%rdx), %edi # sched: [6:2.00] +; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # %bb.0: ; BTVER2-NEXT: crc32b %sil, %edi # sched: [3:2.00] @@ -277,6 +474,13 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: crc32_64_8: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32b (%rdx), %edi # sched: [10:1.00] +; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: crc32_64_8: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: crc32b %sil, %edi # sched: [3:1.00] @@ -305,6 +509,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SLM-NEXT: movq %rdi, %rax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: crc32_64_64: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; SANDY-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; SANDY-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: crc32_64_64: ; SANDY: # %bb.0: ; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -312,6 +523,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: crc32_64_64: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; HASWELL-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; HASWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # %bb.0: ; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -319,6 +537,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: crc32_64_64: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; BROADWELL-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; BROADWELL-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: crc32_64_64: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -326,6 +551,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; BROADWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: crc32_64_64: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: crc32_64_64: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -333,6 +565,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SKYLAKE-NEXT: movq %rdi, %rax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: crc32_64_64: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; SKX-SSE-NEXT: crc32q (%rdx), %rdi # sched: [8:1.00] +; SKX-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: crc32_64_64: ; SKX: # %bb.0: ; SKX-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -340,6 +579,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SKX-NEXT: movq %rdi, %rax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: crc32_64_64: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:2.00] +; BTVER2-SSE-NEXT: crc32q (%rdx), %rdi # sched: [6:2.00] +; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # %bb.0: ; BTVER2-NEXT: crc32q %rsi, %rdi # sched: [3:2.00] @@ -347,6 +593,13 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: crc32_64_64: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; ZNVER1-SSE-NEXT: crc32q (%rdx), %rdi # sched: [10:1.00] +; ZNVER1-SSE-NEXT: movq %rdi, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: crc32_64_64: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] @@ -387,6 +640,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: leal (%rcx,%rsi), %eax # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpestri: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67] +; SANDY-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33] +; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] +; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; SANDY-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpestri: ; SANDY: # %bb.0: ; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] @@ -400,6 +666,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpestri: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; HASWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; HASWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] +; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; HASWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] +; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; HASWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpestri: ; HASWELL: # %bb.0: ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] @@ -413,6 +692,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpestri: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; BROADWELL-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [23:4.00] +; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; BROADWELL-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpestri: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] @@ -426,6 +718,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpestri: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; SKYLAKE-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] +; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; SKYLAKE-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpestri: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] @@ -439,6 +744,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpestri: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; SKX-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] +; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [24:4.00] +; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; SKX-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpestri: ; SKX: # %bb.0: ; SKX-NEXT: movl $7, %eax # sched: [1:0.25] @@ -452,6 +770,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpestri: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [14:5.00] +; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [19:5.00] +; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; BTVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpestri: ; BTVER2: # %bb.0: ; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50] @@ -465,6 +796,19 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BTVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpestri: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl %ecx, %esi # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; ZNVER1-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpestri: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25] @@ -506,6 +850,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [17:17.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpestrm: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67] +; SANDY-SSE-NEXT: movl $7, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: movl $7, %edx # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpestrm: ; SANDY: # %bb.0: ; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] @@ -516,6 +870,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpestrm: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; HASWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; HASWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; HASWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpestrm: ; HASWELL: # %bb.0: ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] @@ -526,6 +890,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpestrm: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; BROADWELL-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; BROADWELL-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpestrm: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: movl $7, %eax # sched: [1:0.25] @@ -536,6 +910,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [24:4.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpestrm: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; SKYLAKE-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpestrm: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: movl $7, %eax # sched: [1:0.25] @@ -546,6 +930,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpestrm: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; SKX-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; SKX-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpestrm: ; SKX: # %bb.0: ; SKX-NEXT: movl $7, %eax # sched: [1:0.25] @@ -556,6 +950,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpestrm: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [14:5.00] +; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpestrm: ; BTVER2: # %bb.0: ; BTVER2-NEXT: movl $7, %eax # sched: [1:0.50] @@ -566,6 +970,16 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BTVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:5.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpestrm: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: movl $7, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movl $7, %edx # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpestrm: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: movl $7, %eax # sched: [1:0.25] @@ -601,6 +1015,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: leal (%rcx,%rax), %eax # sched: [1:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpistri: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; SANDY-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] +; SANDY-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; SANDY-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpistri: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] @@ -610,6 +1033,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpistri: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; HASWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] +; HASWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; HASWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpistri: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] @@ -619,6 +1051,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpistri: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; BROADWELL-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] +; BROADWELL-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; BROADWELL-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpistri: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] @@ -628,6 +1069,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BROADWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpistri: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] +; SKYLAKE-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] +; SKYLAKE-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; SKYLAKE-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpistri: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] @@ -637,6 +1087,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKYLAKE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpistri: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] +; SKX-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [16:3.00] +; SKX-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; SKX-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpistri: ; SKX: # %bb.0: ; SKX-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [10:3.00] @@ -646,6 +1105,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpistri: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00] +; BTVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [12:2.00] +; BTVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; BTVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpistri: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00] @@ -655,6 +1123,15 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; BTVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpistri: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: movl %ecx, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx +; ZNVER1-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpistri: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [100:?] @@ -684,42 +1161,84 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:13.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpistrm: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; SANDY-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpistrm: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] ; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpistrm: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; HASWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpistrm: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpistrm: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; BROADWELL-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpistrm: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] ; BROADWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpistrm: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] +; SKYLAKE-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpistrm: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] ; SKYLAKE-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpistrm: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] +; SKX-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpistrm: ; SKX: # %bb.0: ; SKX-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] ; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpistrm: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00] +; BTVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpistrm: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00] ; BTVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [13:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpistrm: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpistrm: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [100:?] @@ -745,42 +1264,84 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpgtq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpgtq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpgtq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpgtq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpgtq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpgtq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpgtq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpgtq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKYLAKE-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpgtq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [3:1.00] +; SKX-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpgtq: ; SKX: # %bb.0: ; SKX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpgtq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpgtq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpgtq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50] +; ZNVER1-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpgtq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -807,42 +1368,84 @@ define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:10.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pclmulqdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00] +; SANDY-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pclmulqdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00] ; SANDY-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pclmulqdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [11:2.00] +; HASWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [17:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pclmulqdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [11:2.00] ; HASWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [17:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pclmulqdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pclmulqdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pclmulqdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pclmulqdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pclmulqdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [12:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pclmulqdq: ; SKX: # %bb.0: ; SKX-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] ; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pclmulqdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pclmulqdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pclmulqdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pclmulqdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [100:?] diff --git a/llvm/test/CodeGen/X86/ssse3-schedule.ll b/llvm/test/CodeGen/X86/ssse3-schedule.ll index 07451357541..ffa7ef12f38 100644 --- a/llvm/test/CodeGen/X86/ssse3-schedule.ll +++ b/llvm/test/CodeGen/X86/ssse3-schedule.ll @@ -1,15 +1,23 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefixes=CHECK,ATOM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pabsb: @@ -34,6 +42,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pabsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pabsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] @@ -41,6 +56,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pabsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pabsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] @@ -48,6 +70,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pabsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pabsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] @@ -55,6 +84,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pabsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pabsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] @@ -62,6 +98,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pabsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pabsb: ; SKX: # %bb.0: ; SKX-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] @@ -69,6 +112,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pabsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pabsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [6:1.00] @@ -76,6 +126,13 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pabsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pabsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpabsb (%rdi), %xmm1 # sched: [8:0.50] @@ -113,6 +170,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pabsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pabsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] @@ -120,6 +184,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pabsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pabsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] @@ -127,6 +198,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pabsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pabsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] @@ -134,6 +212,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pabsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pabsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] @@ -141,6 +226,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pabsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pabsd: ; SKX: # %bb.0: ; SKX-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] @@ -148,6 +240,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pabsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pabsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [6:1.00] @@ -155,6 +254,13 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pabsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pabsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpabsd (%rdi), %xmm1 # sched: [8:0.50] @@ -192,6 +298,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pabsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pabsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] @@ -199,6 +312,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pabsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pabsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] @@ -206,6 +326,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pabsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pabsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] @@ -213,6 +340,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pabsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pabsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] @@ -220,6 +354,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pabsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; SKX-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pabsw: ; SKX: # %bb.0: ; SKX-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] @@ -227,6 +368,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pabsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pabsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [6:1.00] @@ -234,6 +382,13 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pabsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pabsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpabsw (%rdi), %xmm1 # sched: [8:0.50] @@ -272,42 +427,91 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_palignr: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] +; SANDY-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] +; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_palignr: ; SANDY: # %bb.0: ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_palignr: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] +; HASWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] +; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_palignr: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_palignr: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] +; BROADWELL-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] +; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_palignr: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] ; BROADWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_palignr: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_palignr: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] ; SKYLAKE-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_palignr: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] +; SKX-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] +; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_palignr: ; SKX: # %bb.0: ; SKX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] ; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_palignr: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] +; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] +; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_palignr: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] ; BTVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_palignr: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] +; ZNVER1-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [8:0.50] +; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_palignr: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.25] @@ -338,42 +542,84 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: phaddd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phaddd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] +; SANDY-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phaddd: ; SANDY: # %bb.0: ; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] ; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phaddd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] +; HASWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phaddd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phaddd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] +; BROADWELL-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [8:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phaddd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BROADWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phaddd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] +; SKYLAKE-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phaddd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phaddd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:2.00] +; SKX-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phaddd: ; SKX: # %bb.0: ; SKX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phaddd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phaddd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phaddd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -405,42 +651,84 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: phaddsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phaddsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] +; SANDY-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phaddsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] ; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phaddsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] +; HASWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phaddsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phaddsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] +; BROADWELL-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [8:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phaddsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BROADWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phaddsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] +; SKYLAKE-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phaddsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phaddsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:2.00] +; SKX-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phaddsw: ; SKX: # %bb.0: ; SKX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phaddsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phaddsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phaddsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -472,42 +760,84 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: phaddw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phaddw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] +; SANDY-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phaddw: ; SANDY: # %bb.0: ; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] ; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phaddw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] +; HASWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phaddw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phaddw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] +; BROADWELL-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [8:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phaddw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BROADWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phaddw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] +; SKYLAKE-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phaddw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phaddw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:2.00] +; SKX-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phaddw: ; SKX: # %bb.0: ; SKX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phaddw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phaddw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phaddw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -539,42 +869,84 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: phsubd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phsubd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] +; SANDY-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phsubd: ; SANDY: # %bb.0: ; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] ; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phsubd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] +; HASWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phsubd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phsubd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] +; BROADWELL-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [8:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phsubd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BROADWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phsubd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] +; SKYLAKE-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phsubd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phsubd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:2.00] +; SKX-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phsubd: ; SKX: # %bb.0: ; SKX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phsubd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phsubd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phsubd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -606,42 +978,84 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: phsubsw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phsubsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] +; SANDY-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phsubsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] ; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phsubsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] +; HASWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phsubsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phsubsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] +; BROADWELL-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [8:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phsubsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BROADWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phsubsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] +; SKYLAKE-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phsubsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phsubsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:2.00] +; SKX-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phsubsw: ; SKX: # %bb.0: ; SKX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phsubsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phsubsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phsubsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -673,42 +1087,84 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: phsubw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phsubw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] +; SANDY-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phsubw: ; SANDY: # %bb.0: ; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] ; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phsubw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] +; HASWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phsubw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phsubw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] +; BROADWELL-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [8:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phsubw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BROADWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phsubw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] +; SKYLAKE-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phsubw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKYLAKE-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phsubw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:2.00] +; SKX-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phsubw: ; SKX: # %bb.0: ; SKX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phsubw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phsubw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phsubw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -740,42 +1196,84 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaddubsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaddubsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaddubsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaddubsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaddubsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaddubsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaddubsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaddubsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaddubsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaddubsw: ; SKX: # %bb.0: ; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaddubsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaddubsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaddubsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaddubsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -808,42 +1306,84 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmulhrsw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmulhrsw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmulhrsw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmulhrsw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmulhrsw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmulhrsw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmulhrsw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmulhrsw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmulhrsw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmulhrsw: ; SKX: # %bb.0: ; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmulhrsw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmulhrsw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmulhrsw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmulhrsw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -875,42 +1415,84 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pshufb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pshufb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pshufb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pshufb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pshufb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pshufb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pshufb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pshufb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pshufb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pshufb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pshufb: ; SKX: # %bb.0: ; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pshufb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00] +; BTVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pshufb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BTVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pshufb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pshufb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -946,42 +1528,84 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: psignb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psignb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psignb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psignb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psignb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psignb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psignb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psignb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psignb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psignb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psignb: ; SKX: # %bb.0: ; SKX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psignb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psignb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psignb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psignb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1017,42 +1641,84 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: psignd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psignd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psignd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psignd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psignd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psignd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psignd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psignd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psignd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psignd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psignd: ; SKX: # %bb.0: ; SKX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psignd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psignd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psignd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psignd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1088,42 +1754,84 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: psignw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_psignw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_psignw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_psignw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_psignw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_psignw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_psignw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_psignw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_psignw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_psignw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_psignw: ; SKX: # %bb.0: ; SKX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_psignw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_psignw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_psignw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_psignw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] |