summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/sse2-schedule.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/sse2-schedule.ll')
-rw-r--r--llvm/test/CodeGen/X86/sse2-schedule.ll5948
1 files changed, 5899 insertions, 49 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll
index 0c30bada475..db6d7a5c198 100644
--- a/llvm/test/CodeGen/X86/sse2-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse2-schedule.ll
@@ -1,15 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_addpd:
@@ -30,42 +38,84 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -96,42 +146,84 @@ define double @test_addsd(double %a0, double %a1, double *%a2) {
; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_addsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_addsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_addsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_addsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_addsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_addsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_addsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_addsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_addsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_addsd:
; SKX: # %bb.0:
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_addsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_addsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_addsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_addsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -165,6 +257,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_andpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_andpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -172,6 +271,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_andpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_andpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -179,6 +285,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_andpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_andpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -186,6 +299,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_andpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_andpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -193,6 +313,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_andpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_andpd:
; SKX: # %bb.0:
; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -200,6 +327,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_andpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_andpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -207,6 +341,13 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_andpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_andpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -246,6 +387,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_andnotpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_andnotpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -253,6 +401,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_andnotpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_andnotpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -260,6 +415,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_andnotpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_andnotpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -267,6 +429,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_andnotpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_andnotpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -274,6 +443,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_andnotpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_andnotpd:
; SKX: # %bb.0:
; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -281,6 +457,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_andnotpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_andnotpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -288,6 +471,13 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_andnotpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_andnotpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -329,36 +519,71 @@ define void @test_clflush(i8* %p){
; SLM-NEXT: clflush (%rdi) # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_clflush:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_clflush:
; SANDY: # %bb.0:
; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_clflush:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_clflush:
; HASWELL: # %bb.0:
; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_clflush:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_clflush:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_clflush:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_clflush:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_clflush:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_clflush:
; SKX: # %bb.0:
; SKX-NEXT: clflush (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_clflush:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [5:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_clflush:
; BTVER2: # %bb.0:
; BTVER2-NEXT: clflush (%rdi) # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_clflush:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_clflush:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50]
@@ -390,6 +615,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cmppd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cmppd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -397,6 +629,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cmppd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cmppd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -404,6 +643,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cmppd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cmppd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -411,6 +657,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cmppd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cmppd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50]
@@ -418,14 +671,27 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cmppd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cmppd:
; SKX: # %bb.0:
-; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2q %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
+; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cmppd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cmppd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00]
@@ -433,6 +699,13 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cmppd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cmppd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -467,42 +740,84 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) {
; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cmpsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cmpsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cmpsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cmpsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cmpsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cmpsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cmpsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cmpsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cmpsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cmpsd:
; SKX: # %bb.0:
; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cmpsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cmpsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cmpsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cmpsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -562,6 +877,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_comisd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_comisd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -576,6 +905,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_comisd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_comisd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -590,6 +933,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_comisd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_comisd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -604,6 +961,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_comisd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_comisd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -618,6 +989,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_comisd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_comisd:
; SKX: # %bb.0:
; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -632,6 +1017,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_comisd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_comisd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -646,6 +1045,20 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_comisd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_comisd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -689,6 +1102,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtdq2pd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtdq2pd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
@@ -696,6 +1116,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtdq2pd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtdq2pd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
@@ -703,6 +1130,14 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtdq2pd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtdq2pd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00]
@@ -710,6 +1145,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtdq2pd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtdq2pd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -717,6 +1159,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtdq2pd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtdq2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -724,6 +1173,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtdq2pd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtdq2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00]
@@ -731,6 +1187,13 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) {
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtdq2pd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtdq2pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00]
@@ -769,6 +1232,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtdq2ps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtdq2ps:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -776,6 +1246,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtdq2ps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtdq2ps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -783,6 +1260,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtdq2ps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtdq2ps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00]
@@ -790,6 +1274,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtdq2ps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtdq2ps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50]
@@ -797,6 +1288,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtdq2ps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtdq2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.33]
@@ -804,6 +1302,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtdq2ps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtdq2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00]
@@ -811,6 +1316,13 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) {
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtdq2ps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtdq2ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00]
@@ -847,6 +1359,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtpd2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtpd2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -854,6 +1373,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtpd2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtpd2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -861,6 +1387,14 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtpd2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtpd2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -868,6 +1402,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtpd2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtpd2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -875,6 +1416,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtpd2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -882,6 +1430,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtpd2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
@@ -889,6 +1444,13 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtpd2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtpd2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
@@ -926,6 +1488,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtpd2ps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtpd2ps:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -933,6 +1502,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtpd2ps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtpd2ps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -940,6 +1516,14 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtpd2ps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtpd2ps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00]
@@ -947,6 +1531,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtpd2ps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtpd2ps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
@@ -954,6 +1545,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtpd2ps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtpd2ps:
; SKX: # %bb.0:
; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00]
@@ -961,6 +1559,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtpd2ps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtpd2ps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00]
@@ -968,6 +1573,13 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) {
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtpd2ps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtpd2ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00]
@@ -1005,6 +1617,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtps2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtps2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1012,6 +1631,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtps2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtps2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1019,6 +1645,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtps2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtps2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1026,6 +1659,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtps2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtps2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50]
@@ -1033,6 +1673,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtps2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.33]
@@ -1040,6 +1687,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtps2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00]
@@ -1047,6 +1701,13 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtps2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtps2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00]
@@ -1084,6 +1745,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtps2pd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtps2pd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1091,6 +1759,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtps2pd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtps2pd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1098,6 +1773,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtps2pd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtps2pd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00]
@@ -1105,6 +1787,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtps2pd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtps2pd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -1112,6 +1801,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtps2pd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtps2pd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00]
@@ -1119,6 +1815,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtps2pd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtps2pd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [8:1.00]
@@ -1126,6 +1829,13 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) {
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtps2pd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtps2pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00]
@@ -1163,6 +1873,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) {
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsd2si:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsd2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00]
@@ -1170,6 +1887,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) {
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsd2si:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsd2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00]
@@ -1177,6 +1901,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) {
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsd2si:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsd2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00]
@@ -1184,6 +1915,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) {
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsd2si:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsd2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1191,6 +1929,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) {
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsd2si:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsd2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1198,6 +1943,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) {
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsd2si:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsd2si:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00]
@@ -1205,6 +1957,13 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) {
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsd2si:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsd2si:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00]
@@ -1243,6 +2002,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsd2siq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsd2siq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00]
@@ -1250,6 +2016,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsd2siq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsd2siq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00]
@@ -1257,6 +2030,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsd2siq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsd2siq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00]
@@ -1264,6 +2044,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsd2siq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsd2siq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1271,6 +2058,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsd2siq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsd2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1278,6 +2072,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsd2siq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsd2siq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00]
@@ -1285,6 +2086,13 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) {
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsd2siq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsd2siq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00]
@@ -1327,6 +2135,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsd2ss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsd2ss:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1335,6 +2151,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsd2ss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsd2ss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1343,6 +2167,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsd2ss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsd2ss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1351,6 +2183,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsd2ss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsd2ss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1359,6 +2199,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsd2ss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsd2ss:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1367,6 +2215,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsd2ss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsd2ss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
@@ -1375,6 +2231,14 @@ define float @test_cvtsd2ss(double %a0, double *%a1) {
; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsd2ss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsd2ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
@@ -1411,6 +2275,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsi2sd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsi2sd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1418,6 +2289,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsi2sd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsi2sd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1425,6 +2303,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsi2sd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsi2sd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1432,6 +2317,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsi2sd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsi2sd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1439,6 +2331,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsi2sd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsi2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1446,6 +2345,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsi2sd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsi2sd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1453,6 +2359,13 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) {
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsi2sd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsi2sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1488,6 +2401,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtsi2sdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtsi2sdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1495,6 +2415,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtsi2sdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtsi2sdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1502,6 +2429,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtsi2sdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtsi2sdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00]
@@ -1509,6 +2443,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtsi2sdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtsi2sdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1516,6 +2457,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtsi2sdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtsi2sdq:
; SKX: # %bb.0:
; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1523,6 +2471,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtsi2sdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtsi2sdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [3:1.00]
@@ -1530,6 +2485,13 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) {
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtsi2sdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtsi2sdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1571,6 +2533,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvtss2sd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvtss2sd:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00]
@@ -1579,6 +2549,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvtss2sd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvtss2sd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1587,6 +2565,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvtss2sd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvtss2sd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1595,6 +2581,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvtss2sd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvtss2sd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1603,6 +2597,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvtss2sd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00]
+; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvtss2sd:
; SKX: # %bb.0:
; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
@@ -1611,6 +2613,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvtss2sd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvtss2sd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -1619,6 +2629,14 @@ define double @test_cvtss2sd(float %a0, float *%a1) {
; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvtss2sd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvtss2sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -1656,6 +2674,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttpd2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttpd2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1663,6 +2688,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttpd2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttpd2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1670,6 +2702,14 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttpd2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttpd2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00]
@@ -1677,6 +2717,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttpd2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttpd2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -1684,6 +2731,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttpd2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttpd2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00]
@@ -1691,6 +2745,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttpd2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttpd2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00]
@@ -1698,6 +2759,13 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttpd2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttpd2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00]
@@ -1736,6 +2804,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttps2dq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttps2dq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1743,6 +2818,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttps2dq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttps2dq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1750,6 +2832,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttps2dq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttps2dq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00]
@@ -1757,6 +2846,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttps2dq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttps2dq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50]
@@ -1764,6 +2860,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttps2dq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttps2dq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.33]
@@ -1771,6 +2874,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttps2dq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttps2dq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00]
@@ -1778,6 +2888,13 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttps2dq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttps2dq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00]
@@ -1813,6 +2930,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) {
; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttsd2si:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttsd2si:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00]
@@ -1820,6 +2944,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) {
; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttsd2si:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttsd2si:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00]
@@ -1827,6 +2958,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) {
; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttsd2si:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttsd2si:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00]
@@ -1834,6 +2972,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) {
; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttsd2si:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttsd2si:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1841,6 +2986,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) {
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttsd2si:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00]
+; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttsd2si:
; SKX: # %bb.0:
; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00]
@@ -1848,6 +3000,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) {
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttsd2si:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttsd2si:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00]
@@ -1855,6 +3014,13 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) {
; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttsd2si:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttsd2si:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00]
@@ -1890,6 +3056,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) {
; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_cvttsd2siq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_cvttsd2siq:
; SANDY: # %bb.0:
; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00]
@@ -1897,6 +3070,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) {
; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_cvttsd2siq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00]
+; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_cvttsd2siq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00]
@@ -1904,6 +3084,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) {
; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_cvttsd2siq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00]
+; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_cvttsd2siq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00]
@@ -1911,6 +3098,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) {
; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_cvttsd2siq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00]
+; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_cvttsd2siq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1918,6 +3112,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) {
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_cvttsd2siq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00]
+; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00]
+; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_cvttsd2siq:
; SKX: # %bb.0:
; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00]
@@ -1925,6 +3126,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) {
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_cvttsd2siq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [8:1.00]
+; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_cvttsd2siq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00]
@@ -1932,6 +3140,13 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) {
; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_cvttsd2siq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00]
+; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00]
+; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_cvttsd2siq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00]
@@ -1964,42 +3179,84 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [37:34.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_divpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_divpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_divpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_divpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_divpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_divpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_divpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_divpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_divpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_divpd:
; SKX: # %bb.0:
; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_divpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_divpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_divpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_divpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -2030,42 +3287,84 @@ define double @test_divsd(double %a0, double %a1, double *%a2) {
; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:34.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_divsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_divsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00]
; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_divsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_divsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:1.00]
; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_divsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_divsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_divsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_divsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_divsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:1.00]
+; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_divsd:
; SKX: # %bb.0:
; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:1.00]
; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_divsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00]
+; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_divsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00]
; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_divsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00]
+; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_divsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00]
@@ -2099,36 +3398,71 @@ define void @test_lfence() {
; SLM-NEXT: lfence # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_lfence:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: lfence # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_lfence:
; SANDY: # %bb.0:
; SANDY-NEXT: lfence # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_lfence:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: lfence # sched: [2:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_lfence:
; HASWELL: # %bb.0:
; HASWELL-NEXT: lfence # sched: [2:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_lfence:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_lfence:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: lfence # sched: [2:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_lfence:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_lfence:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: lfence # sched: [2:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_lfence:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: lfence # sched: [2:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_lfence:
; SKX: # %bb.0:
; SKX-NEXT: lfence # sched: [2:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_lfence:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: lfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_lfence:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lfence # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_lfence:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_lfence:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: lfence # sched: [1:0.50]
@@ -2160,36 +3494,71 @@ define void @test_mfence() {
; SLM-NEXT: mfence # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mfence:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mfence # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mfence:
; SANDY: # %bb.0:
; SANDY-NEXT: mfence # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mfence:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mfence # sched: [2:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mfence:
; HASWELL: # %bb.0:
; HASWELL-NEXT: mfence # sched: [2:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mfence:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mfence:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: mfence # sched: [2:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mfence:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mfence:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: mfence # sched: [3:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mfence:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mfence # sched: [3:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mfence:
; SKX: # %bb.0:
; SKX-NEXT: mfence # sched: [3:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mfence:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mfence # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mfence:
; BTVER2: # %bb.0:
; BTVER2-NEXT: mfence # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mfence:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mfence:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: mfence # sched: [1:0.50]
@@ -2219,36 +3588,71 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) {
; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maskmovdqu:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maskmovdqu:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maskmovdqu:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maskmovdqu:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maskmovdqu:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maskmovdqu:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maskmovdqu:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maskmovdqu:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maskmovdqu:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maskmovdqu:
; SKX: # %bb.0:
; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maskmovdqu:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maskmovdqu:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maskmovdqu:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maskmovdqu:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:?]
@@ -2277,42 +3681,84 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maxpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maxpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maxpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maxpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maxpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maxpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maxpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maxpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maxpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maxpd:
; SKX: # %bb.0:
; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maxpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maxpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maxpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maxpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2344,42 +3790,84 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_maxsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_maxsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_maxsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_maxsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_maxsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_maxsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_maxsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_maxsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_maxsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_maxsd:
; SKX: # %bb.0:
; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_maxsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_maxsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_maxsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_maxsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2411,42 +3899,84 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_minpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_minpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_minpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_minpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_minpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_minpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_minpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_minpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_minpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_minpd:
; SKX: # %bb.0:
; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_minpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_minpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_minpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_minpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2478,42 +4008,84 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_minsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_minsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_minsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_minsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_minsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_minsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_minsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_minsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_minsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_minsd:
; SKX: # %bb.0:
; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_minsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_minsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_minsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_minsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -2548,6 +4120,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movapd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movapd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2555,6 +4134,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movapd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movapd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2562,6 +4148,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movapd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movapd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50]
@@ -2569,6 +4162,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movapd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movapd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2576,6 +4176,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movapd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movapd:
; SKX: # %bb.0:
; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50]
@@ -2583,6 +4190,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movapd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movapd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00]
@@ -2590,6 +4204,13 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) {
; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movapd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movapd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50]
@@ -2624,6 +4245,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movdqa:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2631,6 +4259,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movdqa:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movdqa:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2638,6 +4273,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movdqa:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movdqa:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50]
@@ -2645,6 +4287,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movdqa:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movdqa:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2652,6 +4301,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movdqa:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movdqa:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50]
@@ -2659,6 +4315,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movdqa:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movdqa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00]
@@ -2666,6 +4329,13 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) {
; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movdqa:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movdqa:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50]
@@ -2700,6 +4370,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movdqu:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movdqu:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2707,6 +4384,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movdqu:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movdqu:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2714,6 +4398,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movdqu:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movdqu:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50]
@@ -2721,6 +4412,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movdqu:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movdqu:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2728,6 +4426,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movdqu:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movdqu:
; SKX: # %bb.0:
; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50]
@@ -2735,6 +4440,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movdqu:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movdqu:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00]
@@ -2742,6 +4454,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) {
; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movdqu:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movdqu:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50]
@@ -2785,6 +4504,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
+; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2795,6 +4524,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2805,6 +4544,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2815,6 +4564,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
@@ -2825,16 +4584,36 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movd:
; SKX: # %bb.0:
-; SKX-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50]
-; SKX-NEXT: vmovd %edi, %xmm2 # sched: [1:1.00]
-; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50]
+; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00]
-; SKX-NEXT: vmovd %xmm2, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@@ -2845,6 +4624,16 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50]
@@ -2896,6 +4685,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movd_64:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movd_64:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2906,6 +4705,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movd_64:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movd_64:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2916,6 +4725,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movd_64:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movd_64:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2926,6 +4745,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movd_64:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movd_64:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
@@ -2936,16 +4765,36 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movd_64:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00]
+; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movd_64:
; SKX: # %bb.0:
-; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
-; SKX-NEXT: vmovq %rdi, %xmm2 # sched: [1:1.00]
-; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm2 # sched: [1:0.33]
-; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00]
+; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50]
+; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00]
-; SKX-NEXT: vmovq %xmm2, (%rsi) # sched: [1:1.00]
+; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movd_64:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movd_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00]
@@ -2956,6 +4805,16 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) {
; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movd_64:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movd_64:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50]
@@ -2998,6 +4857,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movhpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movhpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
@@ -3005,6 +4871,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movhpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movhpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3012,6 +4885,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movhpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movhpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3019,6 +4899,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movhpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movhpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3026,6 +4913,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movhpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movhpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3033,6 +4927,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movhpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movhpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@@ -3040,6 +4941,13 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movhpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movhpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
@@ -3077,6 +4985,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movlpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movlpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
@@ -3084,6 +4999,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movlpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movlpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3091,6 +5013,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movlpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movlpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3098,6 +5027,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movlpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movlpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3105,6 +5041,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movlpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movlpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3112,6 +5055,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movlpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movlpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@@ -3119,6 +5069,13 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movlpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movlpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
@@ -3152,36 +5109,71 @@ define i32 @test_movmskpd(<2 x double> %a0) {
; SLM-NEXT: movmskpd %xmm0, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movmskpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movmskpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movmskpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movmskpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movmskpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movmskpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movmskpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movmskpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movmskpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movmskpd:
; SKX: # %bb.0:
; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movmskpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movmskpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movmskpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movmskpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00]
@@ -3212,42 +5204,84 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) {
; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movntdqa:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movntdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movntdqa:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movntdqa:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movntdqa:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movntdqa:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movntdqa:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movntdqa:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movntdqa:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movntdqa:
; SKX: # %bb.0:
; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movntdqa:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movntdqa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movntdqa:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movntdqa:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3277,42 +5311,84 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) {
; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movntpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movntpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movntpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movntpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movntpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movntpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movntpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movntpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movntpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movntpd:
; SKX: # %bb.0:
; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movntpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movntpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movntpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movntpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3345,6 +5421,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movq_mem:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movq_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50]
@@ -3352,6 +5435,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movq_mem:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movq_mem:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3359,6 +5449,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movq_mem:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movq_mem:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3366,6 +5463,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movq_mem:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movq_mem:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3373,6 +5477,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movq_mem:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movq_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50]
@@ -3380,6 +5491,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movq_mem:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movq_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00]
@@ -3387,6 +5505,13 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) {
; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movq_mem:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movq_mem:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50]
@@ -3422,42 +5547,84 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) {
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movq_reg:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:1.00]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movq_reg:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movq_reg:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movq_reg:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movq_reg:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movq_reg:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movq_reg:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movq_reg:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movq_reg:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movq_reg:
; SKX: # %bb.0:
; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33]
; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movq_reg:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movq_reg:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50]
; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movq_reg:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movq_reg:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25]
@@ -3490,6 +5657,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movsd_mem:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
+; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movsd_mem:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50]
@@ -3497,6 +5671,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movsd_mem:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movsd_mem:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3504,6 +5685,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movsd_mem:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movsd_mem:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3511,6 +5699,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movsd_mem:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movsd_mem:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3518,6 +5713,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movsd_mem:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
+; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movsd_mem:
; SKX: # %bb.0:
; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50]
@@ -3525,6 +5727,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movsd_mem:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
+; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movsd_mem:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00]
@@ -3532,6 +5741,13 @@ define void @test_movsd_mem(double* %a0, double* %a1) {
; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movsd_mem:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movsd_mem:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50]
@@ -3567,36 +5783,78 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) {
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movsd_reg:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movsd_reg:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movsd_reg:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movsd_reg:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movsd_reg:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movsd_reg:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movsd_reg:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movsd_reg:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movsd_reg:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00]
+; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movsd_reg:
; SKX: # %bb.0:
; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movsd_reg:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movsd_reg:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movsd_reg:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movsd_reg:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50]
@@ -3627,6 +5885,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movupd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movupd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3634,6 +5899,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movupd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movupd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3641,6 +5913,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movupd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movupd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50]
@@ -3648,6 +5927,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movupd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movupd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3655,6 +5941,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movupd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movupd:
; SKX: # %bb.0:
; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50]
@@ -3662,6 +5955,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movupd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movupd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00]
@@ -3669,6 +5969,13 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) {
; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movupd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movupd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50]
@@ -3700,42 +6007,84 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mulpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mulpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mulpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mulpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mulpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mulpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mulpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mulpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mulpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mulpd:
; SKX: # %bb.0:
; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mulpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00]
+; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mulpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mulpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mulpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -3766,42 +6115,84 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) {
; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mulsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mulsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mulsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50]
+; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mulsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mulsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50]
+; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mulsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mulsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mulsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mulsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mulsd:
; SKX: # %bb.0:
; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mulsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00]
+; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mulsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mulsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50]
+; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mulsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50]
@@ -3835,6 +6226,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_orpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_orpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3842,6 +6240,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_orpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_orpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3849,6 +6254,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_orpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_orpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -3856,6 +6268,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_orpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_orpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -3863,6 +6282,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_orpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_orpd:
; SKX: # %bb.0:
; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -3870,6 +6296,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_orpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_orpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -3877,6 +6310,13 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_orpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_orpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3917,42 +6357,84 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packssdw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packssdw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packssdw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packssdw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packssdw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packssdw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packssdw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packssdw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packssdw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packssdw:
; SKX: # %bb.0:
; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packssdw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packssdw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packssdw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packssdw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -3989,42 +6471,84 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packsswb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packsswb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packsswb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packsswb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packsswb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packsswb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packsswb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packsswb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packsswb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packsswb:
; SKX: # %bb.0:
; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packsswb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packsswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packsswb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packsswb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4061,42 +6585,84 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packuswb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packuswb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packuswb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packuswb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packuswb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packuswb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packuswb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packuswb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packuswb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packuswb:
; SKX: # %bb.0:
; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packuswb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packuswb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packuswb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packuswb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4133,42 +6699,84 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddb:
; SKX: # %bb.0:
; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4203,42 +6811,84 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddd:
; SKX: # %bb.0:
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4269,42 +6919,84 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddq:
; SKX: # %bb.0:
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4339,42 +7031,84 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddsb:
; SKX: # %bb.0:
; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4410,42 +7144,84 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddsw:
; SKX: # %bb.0:
; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4481,42 +7257,84 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddusb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddusb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddusb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddusb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddusb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddusb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddusb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddusb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddusb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddusb:
; SKX: # %bb.0:
; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddusb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddusb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddusb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4552,42 +7370,84 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddusw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddusw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddusw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddusw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddusw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddusw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddusw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddusw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddusw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddusw:
; SKX: # %bb.0:
; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddusw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddusw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddusw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4623,42 +7483,84 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_paddw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_paddw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_paddw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_paddw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_paddw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_paddw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_paddw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_paddw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_paddw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_paddw:
; SKX: # %bb.0:
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_paddw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_paddw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_paddw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_paddw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4692,6 +7594,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pand:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pand:
; SANDY: # %bb.0:
; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4699,6 +7608,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pand:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pand:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4706,6 +7622,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pand:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pand:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4713,6 +7636,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pand:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pand:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4720,6 +7650,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pand:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pand:
; SKX: # %bb.0:
; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4727,6 +7664,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pand:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pand:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -4734,6 +7678,13 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pand:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pand:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4775,6 +7726,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pandn:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pandn:
; SANDY: # %bb.0:
; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4782,6 +7742,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pandn:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pandn:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4789,6 +7758,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pandn:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pandn:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4796,6 +7774,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pandn:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pandn:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4803,6 +7790,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pandn:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pandn:
; SKX: # %bb.0:
; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -4810,6 +7806,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pandn:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pandn:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -4817,6 +7822,15 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pandn:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pandn:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4855,42 +7869,84 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pavgb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pavgb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pavgb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pavgb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pavgb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pavgb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pavgb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pavgb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pavgb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pavgb:
; SKX: # %bb.0:
; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pavgb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pavgb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pavgb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pavgb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -4935,42 +7991,84 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pavgw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pavgw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pavgw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pavgw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pavgw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pavgw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pavgw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pavgw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pavgw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pavgw:
; SKX: # %bb.0:
; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pavgw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pavgw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pavgw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pavgw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5016,6 +8114,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5023,6 +8128,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5030,6 +8142,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5037,6 +8156,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5044,14 +8170,27 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqb:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5059,6 +8198,13 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5097,6 +8243,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5104,6 +8257,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5111,6 +8271,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5118,6 +8285,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5125,14 +8299,27 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqd:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5140,6 +8327,13 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5178,6 +8372,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5185,6 +8386,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5192,6 +8400,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5199,6 +8414,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5206,14 +8428,27 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqw:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5221,6 +8456,13 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5260,6 +8502,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpgtb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpgtb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5267,6 +8517,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpgtb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpgtb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5274,6 +8532,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpgtb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpgtb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5281,6 +8547,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpgtb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpgtb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5288,14 +8562,29 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpgtb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpgtb:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpgtb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpgtb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5303,6 +8592,14 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpgtb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpgtb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5342,6 +8639,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpgtd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpgtd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5349,6 +8654,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpgtd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpgtd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5356,6 +8669,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpgtd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpgtd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5363,6 +8684,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpgtd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpgtd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5370,14 +8699,29 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpgtd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpgtd:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korw %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2d %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpgtd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpgtd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5385,6 +8729,14 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpgtd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpgtd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5424,6 +8776,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpgtw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpgtw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5431,6 +8791,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpgtw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpgtw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5438,6 +8806,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpgtw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpgtw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5445,6 +8821,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpgtw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpgtw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5452,14 +8836,29 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpgtw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpgtw:
; SKX: # %bb.0:
-; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # sched: [3:1.00]
-; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %k1 # sched: [9:1.00]
-; SKX-NEXT: korb %k1, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: vpmovm2w %k0, %xmm0 # sched: [1:0.25]
+; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
+; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
+; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpgtw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpgtw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50]
@@ -5467,6 +8866,14 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpgtw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpgtw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25]
@@ -5500,42 +8907,84 @@ define i16 @test_pextrw(<8 x i16> %a0) {
; SLM-NEXT: # kill: def $ax killed $ax killed $eax
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: # kill: def $ax killed $ax killed $eax
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrw:
; SKX: # %bb.0:
; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00]
@@ -5568,42 +9017,84 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) {
; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrw:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5634,42 +9125,84 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaddwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaddwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaddwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaddwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaddwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaddwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaddwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaddwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaddwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaddwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaddwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaddwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaddwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaddwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -5706,42 +9239,84 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxsw:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5777,42 +9352,84 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxub:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxub:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxub:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxub:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxub:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxub:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxub:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxub:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxub:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxub:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxub:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxub:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxub:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5848,42 +9465,84 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminsw:
; SKX: # %bb.0:
; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5919,42 +9578,84 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminub:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminub:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminub:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminub:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminub:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminub:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminub:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminub:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminub:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminub:
; SKX: # %bb.0:
; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminub:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminub:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminub:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -5985,36 +9686,71 @@ define i32 @test_pmovmskb(<16 x i8> %a0) {
; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovmskb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovmskb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovmskb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovmskb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovmskb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovmskb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovmskb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovmskb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovmskb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovmskb:
; SKX: # %bb.0:
; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovmskb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovmskb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovmskb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovmskb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00]
@@ -6043,42 +9779,84 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmulhuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmulhuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmulhuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmulhuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmulhuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmulhuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmulhuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmulhuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmulhuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmulhuw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmulhuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmulhuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmulhuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmulhuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6110,42 +9888,84 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmulhw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmulhw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmulhw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmulhw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmulhw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmulhw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmulhw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmulhw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmulhw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmulhw:
; SKX: # %bb.0:
; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmulhw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmulhw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmulhw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmulhw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6177,42 +9997,84 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmullw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmullw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmullw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmullw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmullw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmullw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmullw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmullw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmullw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmullw:
; SKX: # %bb.0:
; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmullw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmullw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmullw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmullw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6243,42 +10105,84 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmuludq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmuludq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmuludq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmuludq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmuludq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmuludq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmuludq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmuludq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmuludq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmuludq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmuludq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmuludq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmuludq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmuludq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -6314,6 +10218,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_por:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_por:
; SANDY: # %bb.0:
; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6321,6 +10232,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_por:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_por:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6328,6 +10246,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_por:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_por:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6335,6 +10260,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_por:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_por:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6342,6 +10274,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_por:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_por:
; SKX: # %bb.0:
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -6349,6 +10288,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_por:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_por:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6356,6 +10302,13 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_por:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_por:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -6392,42 +10345,84 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psadbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psadbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psadbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psadbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psadbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psadbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psadbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psadbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psadbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psadbw:
; SKX: # %bb.0:
; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psadbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psadbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psadbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psadbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -6465,6 +10460,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pshufd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pshufd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50]
@@ -6472,6 +10474,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pshufd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pshufd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6479,6 +10488,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pshufd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pshufd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6486,6 +10502,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pshufd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pshufd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6493,6 +10516,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pshufd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00]
+; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pshufd:
; SKX: # %bb.0:
; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00]
@@ -6500,6 +10530,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pshufd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pshufd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00]
@@ -6507,6 +10544,13 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pshufd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pshufd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50]
@@ -6544,6 +10588,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pshufhw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pshufhw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
@@ -6551,6 +10602,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pshufhw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pshufhw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6558,6 +10616,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pshufhw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pshufhw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6565,6 +10630,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pshufhw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pshufhw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6572,6 +10644,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pshufhw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
+; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pshufhw:
; SKX: # %bb.0:
; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00]
@@ -6579,6 +10658,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pshufhw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pshufhw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00]
@@ -6586,6 +10672,13 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) {
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pshufhw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pshufhw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50]
@@ -6623,6 +10716,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pshuflw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pshuflw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
@@ -6630,6 +10730,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pshuflw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pshuflw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6637,6 +10744,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pshuflw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pshuflw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6644,6 +10758,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pshuflw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pshuflw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6651,6 +10772,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pshuflw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
+; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pshuflw:
; SKX: # %bb.0:
; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00]
@@ -6658,6 +10786,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pshuflw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pshuflw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00]
@@ -6665,6 +10800,13 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) {
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pshuflw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pshuflw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50]
@@ -6700,6 +10842,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pslld:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pslld:
; SANDY: # %bb.0:
; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6707,6 +10856,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pslld:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pslld:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6714,6 +10870,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pslld:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pslld:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6721,6 +10884,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pslld:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pslld:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6728,6 +10898,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pslld:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pslld:
; SKX: # %bb.0:
; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6735,6 +10912,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pslld:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pslld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6742,6 +10926,13 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pslld:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pslld:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6779,36 +10970,71 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) {
; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pslldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pslldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pslldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pslldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pslldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pslldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pslldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pslldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pslldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pslldq:
; SKX: # %bb.0:
; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pslldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pslldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pslldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pslldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00]
@@ -6839,6 +11065,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psllq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psllq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6846,6 +11079,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psllq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psllq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6853,6 +11093,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psllq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psllq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6860,6 +11107,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psllq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psllq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6867,6 +11121,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psllq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psllq:
; SKX: # %bb.0:
; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6874,6 +11135,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psllq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psllq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6881,6 +11149,13 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psllq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psllq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6918,6 +11193,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psllw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psllw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6925,6 +11207,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psllw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psllw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6932,6 +11221,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psllw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psllw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6939,6 +11235,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psllw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psllw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6946,6 +11249,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psllw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psllw:
; SKX: # %bb.0:
; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -6953,6 +11263,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psllw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psllw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -6960,6 +11277,13 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psllw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psllw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -6997,6 +11321,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrad:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrad:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7004,6 +11335,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrad:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrad:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7011,6 +11349,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrad:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrad:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7018,6 +11363,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrad:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrad:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7025,6 +11377,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrad:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrad:
; SKX: # %bb.0:
; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7032,6 +11391,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrad:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrad:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7039,6 +11405,13 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrad:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrad:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7076,6 +11449,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psraw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psraw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7083,6 +11463,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psraw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psraw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7090,6 +11477,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psraw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psraw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7097,6 +11491,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psraw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psraw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7104,6 +11505,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psraw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psraw:
; SKX: # %bb.0:
; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7111,6 +11519,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psraw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psraw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7118,6 +11533,13 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psraw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psraw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7155,6 +11577,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrld:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrld:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7162,6 +11591,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrld:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrld:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7169,6 +11605,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrld:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrld:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7176,6 +11619,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrld:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrld:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7183,6 +11633,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrld:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrld:
; SKX: # %bb.0:
; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7190,6 +11647,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrld:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7197,6 +11661,13 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrld:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrld:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7234,36 +11705,71 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) {
; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrldq:
; SKX: # %bb.0:
; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00]
@@ -7294,6 +11800,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrlq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrlq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7301,6 +11814,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrlq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrlq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7308,6 +11828,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrlq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrlq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7315,6 +11842,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrlq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrlq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7322,6 +11856,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrlq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrlq:
; SKX: # %bb.0:
; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7329,6 +11870,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrlq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrlq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7336,6 +11884,13 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrlq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrlq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7373,6 +11928,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psrlw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psrlw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7380,6 +11942,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psrlw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psrlw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7387,6 +11956,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psrlw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psrlw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7394,6 +11970,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psrlw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psrlw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7401,6 +11984,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psrlw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psrlw:
; SKX: # %bb.0:
; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
@@ -7408,6 +11998,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psrlw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psrlw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -7415,6 +12012,13 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psrlw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psrlw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -7453,42 +12057,84 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubb:
; SKX: # %bb.0:
; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7523,42 +12169,84 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubd:
; SKX: # %bb.0:
; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7589,42 +12277,84 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubq:
; SKX: # %bb.0:
; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7659,42 +12389,84 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubsb:
; SKX: # %bb.0:
; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7730,42 +12502,84 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubsw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubsw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubsw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubsw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubsw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubsw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubsw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubsw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubsw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubsw:
; SKX: # %bb.0:
; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubsw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubsw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubsw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7801,42 +12615,84 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubusb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubusb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubusb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubusb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubusb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubusb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubusb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubusb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubusb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubusb:
; SKX: # %bb.0:
; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubusb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubusb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubusb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubusb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7872,42 +12728,84 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubusw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubusw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubusw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubusw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubusw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubusw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubusw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubusw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubusw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubusw:
; SKX: # %bb.0:
; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubusw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubusw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubusw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubusw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -7943,42 +12841,84 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_psubw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_psubw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_psubw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_psubw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_psubw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_psubw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_psubw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_psubw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_psubw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_psubw:
; SKX: # %bb.0:
; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_psubw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_psubw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_psubw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_psubw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -8013,42 +12953,84 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhbw:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00]
; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50]
; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25]
@@ -8084,6 +13066,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -8091,6 +13080,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8098,6 +13094,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8105,6 +13108,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8112,6 +13122,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhdq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
@@ -8119,6 +13136,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@@ -8126,6 +13150,13 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
@@ -8161,6 +13192,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhqdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhqdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8168,6 +13206,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhqdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhqdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8175,6 +13220,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhqdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhqdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8182,6 +13234,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhqdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhqdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8189,6 +13248,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhqdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhqdq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8196,6 +13262,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhqdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhqdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8203,6 +13276,13 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhqdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhqdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25]
@@ -8239,42 +13319,84 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckhwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckhwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckhwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckhwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckhwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckhwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckhwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckhwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckhwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckhwd:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckhwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckhwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckhwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckhwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
@@ -8309,42 +13431,84 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpcklbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpcklbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpcklbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpcklbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpcklbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpcklbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpcklbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpcklbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpcklbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpcklbw:
; SKX: # %bb.0:
; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00]
; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpcklbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpcklbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50]
; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpcklbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpcklbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25]
@@ -8380,6 +13544,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpckldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpckldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8387,6 +13558,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpckldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpckldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8394,6 +13572,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpckldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpckldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8401,6 +13586,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpckldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpckldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8408,6 +13600,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpckldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpckldq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
@@ -8415,6 +13614,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpckldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpckldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@@ -8422,6 +13628,13 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpckldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpckldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25]
@@ -8457,6 +13670,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpcklqdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpcklqdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -8464,6 +13684,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpcklqdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpcklqdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8471,6 +13698,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpcklqdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpcklqdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8478,6 +13712,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpcklqdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpcklqdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8485,6 +13726,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpcklqdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpcklqdq:
; SKX: # %bb.0:
; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -8492,6 +13740,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpcklqdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpcklqdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -8499,6 +13754,13 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpcklqdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpcklqdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25]
@@ -8535,42 +13797,84 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_punpcklwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_punpcklwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_punpcklwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_punpcklwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_punpcklwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_punpcklwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_punpcklwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_punpcklwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_punpcklwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
+; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_punpcklwd:
; SKX: # %bb.0:
; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_punpcklwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_punpcklwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_punpcklwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
+; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_punpcklwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25]
@@ -8604,6 +13908,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pxor:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pxor:
; SANDY: # %bb.0:
; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8611,6 +13922,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pxor:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pxor:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8618,6 +13936,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pxor:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pxor:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8625,6 +13950,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pxor:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pxor:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8632,6 +13964,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pxor:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pxor:
; SKX: # %bb.0:
; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -8639,6 +13978,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pxor:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pxor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -8646,6 +13992,13 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pxor:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pxor:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -8681,6 +14034,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_shufpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_shufpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8688,6 +14048,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_shufpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_shufpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8695,6 +14062,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_shufpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_shufpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8702,6 +14076,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_shufpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_shufpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8709,6 +14090,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_shufpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_shufpd:
; SKX: # %bb.0:
; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00]
@@ -8716,6 +14104,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_shufpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_shufpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
@@ -8723,6 +14118,13 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_shufpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_shufpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50]
@@ -8759,6 +14161,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_sqrtpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00]
+; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_sqrtpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00]
@@ -8766,6 +14175,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_sqrtpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_sqrtpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
@@ -8773,6 +14189,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_sqrtpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [25:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_sqrtpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00]
@@ -8780,6 +14203,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_sqrtpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_sqrtpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00]
@@ -8787,6 +14217,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_sqrtpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:1.00]
+; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_sqrtpd:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:1.00]
@@ -8794,6 +14231,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_sqrtpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00]
+; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_sqrtpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [26:21.00]
@@ -8801,6 +14245,13 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) {
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_sqrtpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:1.00]
+; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_sqrtpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:1.00]
@@ -8842,6 +14293,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_sqrtsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00]
+; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_sqrtsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8850,6 +14309,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_sqrtsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_sqrtsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8858,6 +14325,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_sqrtsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_sqrtsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00]
@@ -8866,6 +14341,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_sqrtsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_sqrtsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
@@ -8874,6 +14357,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_sqrtsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:1.00]
+; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50]
+; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_sqrtsd:
; SKX: # %bb.0:
; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:1.00]
@@ -8882,6 +14373,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_sqrtsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [26:21.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_sqrtsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00]
@@ -8890,6 +14389,14 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) {
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_sqrtsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_sqrtsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50]
@@ -8924,42 +14431,84 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_subpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_subpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_subpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_subpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_subpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_subpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_subpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_subpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_subpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_subpd:
; SKX: # %bb.0:
; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_subpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_subpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_subpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_subpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -8990,42 +14539,84 @@ define double @test_subsd(double %a0, double %a1, double *%a2) {
; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_subsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_subsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_subsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_subsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_subsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_subsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_subsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_subsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_subsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_subsd:
; SKX: # %bb.0:
; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_subsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_subsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_subsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_subsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
@@ -9080,6 +14671,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2)
; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_ucomisd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_ucomisd:
; SANDY: # %bb.0:
; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9094,6 +14699,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2)
; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_ucomisd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_ucomisd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9108,6 +14727,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2)
; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_ucomisd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_ucomisd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9122,6 +14755,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2)
; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_ucomisd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_ucomisd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9136,6 +14783,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2)
; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_ucomisd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: setnp %al # sched: [1:0.50]
+; SKX-SSE-NEXT: sete %dl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_ucomisd:
; SKX: # %bb.0:
; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00]
@@ -9150,6 +14811,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2)
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_ucomisd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_ucomisd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9164,6 +14839,20 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2)
; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_ucomisd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00]
+; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_ucomisd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00]
@@ -9207,6 +14896,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_unpckhpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_unpckhpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9214,6 +14910,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_unpckhpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_unpckhpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9221,6 +14924,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_unpckhpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_unpckhpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9228,6 +14938,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_unpckhpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_unpckhpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9235,6 +14952,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_unpckhpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
+; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_unpckhpd:
; SKX: # %bb.0:
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00]
@@ -9242,6 +14966,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_unpckhpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_unpckhpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -9249,6 +14980,13 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_unpckhpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_unpckhpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50]
@@ -9290,6 +15028,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_unpcklpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SANDY-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00]
+; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_unpcklpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9297,6 +15044,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_unpcklpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_unpcklpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9304,6 +15060,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_unpcklpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_unpcklpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9311,6 +15076,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_unpcklpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_unpcklpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9318,6 +15092,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_unpcklpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
+; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33]
+; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.33]
+; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_unpcklpd:
; SKX: # %bb.0:
; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
@@ -9325,6 +15108,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_unpcklpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_unpcklpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -9332,6 +15124,15 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_unpcklpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_unpcklpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@@ -9367,6 +15168,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_xorpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_xorpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9374,6 +15182,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_xorpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_xorpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9381,6 +15196,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_xorpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_xorpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -9388,6 +15210,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_xorpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_xorpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -9395,6 +15224,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_xorpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_xorpd:
; SKX: # %bb.0:
; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
@@ -9402,6 +15238,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_xorpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_xorpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -9409,6 +15252,13 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_xorpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_xorpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
OpenPOWER on IntegriCloud