summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/sse41-schedule.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/X86/sse41-schedule.ll')
-rw-r--r--llvm/test/CodeGen/X86/sse41-schedule.ll2338
1 files changed, 2325 insertions, 13 deletions
diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll
index 88cb90fdb43..4bfcebf3cb9 100644
--- a/llvm/test/CodeGen/X86/sse41-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse41-schedule.ll
@@ -1,14 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) {
; GENERIC-LABEL: test_blendpd:
@@ -25,6 +33,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -32,6 +47,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -39,6 +61,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -46,6 +75,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
@@ -53,14 +89,27 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendpd:
; SKX: # %bb.0:
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00]
-; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [6:0.50]
+; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33]
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00]
+; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -68,6 +117,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50]
@@ -96,6 +152,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; SANDY-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendps:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -103,6 +166,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -110,6 +180,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -117,6 +194,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -124,6 +208,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
+; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendps:
; SKX: # %bb.0:
; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33]
@@ -131,6 +222,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -138,6 +236,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@@ -170,42 +275,105 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendvpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendvpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendvpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendvpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendvpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendvpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendvpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendvpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendvpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendvpd:
; SKX: # %bb.0:
; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendvpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendvpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendvpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [3:0.33]
+; ZNVER1-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [11:0.67]
+; ZNVER1-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendvpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -237,42 +405,105 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_blendvps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_blendvps:
; SANDY: # %bb.0:
; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_blendvps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_blendvps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_blendvps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_blendvps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_blendvps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_blendvps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_blendvps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_blendvps:
; SKX: # %bb.0:
; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_blendvps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_blendvps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_blendvps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [3:0.33]
+; ZNVER1-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [11:0.67]
+; ZNVER1-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_blendvps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
@@ -298,42 +529,84 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_dppd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_dppd:
; SANDY: # %bb.0:
; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_dppd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; HASWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_dppd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_dppd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; BROADWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_dppd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_dppd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_dppd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_dppd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_dppd:
; SKX: # %bb.0:
; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_dppd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
+; BTVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:3.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_dppd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00]
; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_dppd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_dppd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -359,42 +632,84 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_dpps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
+; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_dpps:
; SANDY: # %bb.0:
; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_dpps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
+; HASWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [20:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_dpps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_dpps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00]
+; BROADWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_dpps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00]
; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_dpps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.50]
+; SKYLAKE-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_dpps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50]
; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_dpps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.33]
+; SKX-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_dpps:
; SKX: # %bb.0:
; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33]
; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_dpps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
+; BTVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [16:3.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_dpps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00]
; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_dpps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_dpps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -420,42 +735,84 @@ define i32 @test_extractps(<4 x float> %a0, i32 *%a1) {
; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_extractps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_extractps:
; SANDY: # %bb.0:
; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_extractps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_extractps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_extractps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_extractps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_extractps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_extractps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_extractps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_extractps:
; SKX: # %bb.0:
; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_extractps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_extractps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_extractps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:2.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_extractps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00]
@@ -482,42 +839,84 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2)
; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_insertps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_insertps:
; SANDY: # %bb.0:
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_insertps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_insertps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_insertps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_insertps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_insertps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_insertps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_insertps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
+; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_insertps:
; SKX: # %bb.0:
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_insertps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
+; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_insertps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_insertps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_insertps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
@@ -541,36 +940,71 @@ define <2 x i64> @test_movntdqa(i8* %a0) {
; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_movntdqa:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_movntdqa:
; SANDY: # %bb.0:
; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_movntdqa:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_movntdqa:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_movntdqa:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_movntdqa:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_movntdqa:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_movntdqa:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_movntdqa:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_movntdqa:
; SKX: # %bb.0:
; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_movntdqa:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_movntdqa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_movntdqa:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_movntdqa:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50]
@@ -593,42 +1027,84 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_mpsadbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
+; SANDY-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_mpsadbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_mpsadbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
+; HASWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_mpsadbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_mpsadbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_mpsadbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_mpsadbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
+; SKYLAKE-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_mpsadbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_mpsadbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00]
+; SKX-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_mpsadbw:
; SKX: # %bb.0:
; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00]
; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_mpsadbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
+; BTVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_mpsadbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00]
; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_mpsadbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [100:?]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_mpsadbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:?]
@@ -655,42 +1131,84 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_packusdw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_packusdw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_packusdw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_packusdw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_packusdw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_packusdw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_packusdw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_packusdw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_packusdw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00]
+; SKX-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_packusdw:
; SKX: # %bb.0:
; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_packusdw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_packusdw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_packusdw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_packusdw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -723,42 +1241,105 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16
; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pblendvb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; SANDY-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; SANDY-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pblendvb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pblendvb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00]
+; HASWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pblendvb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pblendvb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BROADWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pblendvb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pblendvb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKYLAKE-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKYLAKE-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pblendvb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pblendvb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
+; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67]
+; SKX-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67]
+; SKX-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pblendvb:
; SKX: # %bb.0:
; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67]
; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pblendvb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00]
+; BTVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00]
+; BTVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pblendvb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00]
; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pblendvb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pblendvb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
@@ -786,6 +1367,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pblendw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pblendw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@@ -793,6 +1381,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pblendw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pblendw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -800,6 +1395,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pblendw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pblendw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -807,6 +1409,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pblendw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pblendw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -814,6 +1423,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pblendw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
+; SKX-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pblendw:
; SKX: # %bb.0:
; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00]
@@ -821,6 +1437,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pblendw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
+; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pblendw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@@ -828,6 +1451,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pblendw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
+; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pblendw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33]
@@ -854,42 +1484,84 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pcmpeqq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pcmpeqq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pcmpeqq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pcmpeqq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pcmpeqq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pcmpeqq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pcmpeqq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pcmpeqq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pcmpeqq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pcmpeqq:
; SKX: # %bb.0:
; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pcmpeqq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pcmpeqq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pcmpeqq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pcmpeqq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -916,42 +1588,84 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrb:
; SKX: # %bb.0:
; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:2.00]
@@ -979,6 +1693,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -986,6 +1707,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -993,6 +1721,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -1000,6 +1735,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
@@ -1007,6 +1749,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrd:
; SKX: # %bb.0:
; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
@@ -1014,6 +1763,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
@@ -1021,6 +1777,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1047,42 +1810,84 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrq:
; SKX: # %bb.0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50]
; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:2.00]
@@ -1107,42 +1912,84 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pextrw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pextrw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pextrw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00]
+; HASWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pextrw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pextrw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pextrw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00]
; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pextrw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pextrw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pextrw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
+; SKX-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pextrw:
; SKX: # %bb.0:
; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pextrw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pextrw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50]
; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pextrw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:2.00]
+; ZNVER1-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pextrw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:2.00]
@@ -1168,42 +2015,84 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_phminposuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_phminposuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_phminposuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_phminposuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_phminposuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_phminposuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_phminposuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_phminposuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_phminposuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_phminposuw:
; SKX: # %bb.0:
; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50]
; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_phminposuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_phminposuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_phminposuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_phminposuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
@@ -1229,42 +2118,84 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrb:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1289,42 +2220,84 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrd:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00]
; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1351,6 +2324,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pinsrq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pinsrq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
@@ -1358,6 +2338,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pinsrq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; HASWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pinsrq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1365,6 +2352,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pinsrq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; BROADWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pinsrq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1372,6 +2366,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pinsrq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; SKYLAKE-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pinsrq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1379,6 +2380,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pinsrq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00]
+; SKX-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pinsrq:
; SKX: # %bb.0:
; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00]
@@ -1386,6 +2394,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pinsrq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pinsrq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00]
@@ -1393,6 +2408,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pinsrq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pinsrq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50]
@@ -1419,42 +2441,84 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxsb:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1480,42 +2544,84 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxsd:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1541,42 +2647,84 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxud:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxud:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxud:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxud:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxud:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxud:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxud:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxud:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxud:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxud:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxud:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxud:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxud:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxud:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1602,42 +2750,84 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmaxuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmaxuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmaxuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmaxuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmaxuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmaxuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmaxuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmaxuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmaxuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmaxuw:
; SKX: # %bb.0:
; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmaxuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmaxuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmaxuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmaxuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1663,42 +2853,84 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminsb:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminsb:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminsb:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminsb:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminsb:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminsb:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminsb:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminsb:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminsb:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminsb:
; SKX: # %bb.0:
; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminsb:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminsb:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminsb:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminsb:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1724,42 +2956,84 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminsd:
; SKX: # %bb.0:
; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1785,42 +3059,84 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminud:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminud:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminud:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminud:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminud:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminud:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminud:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminud:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminud:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminud:
; SKX: # %bb.0:
; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminud:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminud:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminud:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminud:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1846,42 +3162,84 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pminuw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pminuw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pminuw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pminuw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pminuw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pminuw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pminuw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pminuw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pminuw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; SKX-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pminuw:
; SKX: # %bb.0:
; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pminuw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pminuw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pminuw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pminuw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25]
@@ -1910,6 +3268,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
@@ -1917,6 +3282,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1924,6 +3296,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1931,6 +3310,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1938,6 +3324,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxbw:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00]
@@ -1945,6 +3338,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00]
@@ -1952,6 +3352,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50]
@@ -1982,6 +3389,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxbd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxbd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
@@ -1989,6 +3403,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxbd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxbd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -1996,6 +3417,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxbd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxbd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2003,6 +3431,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxbd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxbd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2010,6 +3445,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxbd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxbd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2017,6 +3459,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxbd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxbd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00]
@@ -2024,6 +3473,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxbd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxbd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50]
@@ -2054,6 +3510,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxbq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxbq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2061,6 +3524,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxbq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxbq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2068,6 +3538,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxbq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxbq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2075,6 +3552,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxbq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxbq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2082,6 +3566,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxbq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxbq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2089,6 +3580,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxbq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxbq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2096,6 +3594,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxbq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxbq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2126,6 +3631,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2133,6 +3645,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2140,6 +3659,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2147,6 +3673,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2154,6 +3687,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxdq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2161,6 +3701,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2168,6 +3715,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2198,6 +3752,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
@@ -2205,6 +3766,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2212,6 +3780,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2219,6 +3794,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2226,6 +3808,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00]
@@ -2233,6 +3822,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00]
@@ -2240,6 +3836,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50]
@@ -2270,6 +3873,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovsxwq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovsxwq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
@@ -2277,6 +3887,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovsxwq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovsxwq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2284,6 +3901,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovsxwq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovsxwq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2291,6 +3915,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovsxwq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovsxwq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2298,6 +3929,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovsxwq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00]
+; SKX-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovsxwq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00]
@@ -2305,6 +3943,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovsxwq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovsxwq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00]
@@ -2312,6 +3957,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovsxwq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovsxwq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50]
@@ -2342,6 +3994,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxbw:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxbw:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
@@ -2349,6 +4008,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxbw:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxbw:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2356,6 +4022,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxbw:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxbw:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2363,6 +4036,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxbw:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxbw:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2370,6 +4050,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxbw:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxbw:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00]
@@ -2377,6 +4064,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxbw:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxbw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00]
@@ -2384,6 +4078,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxbw:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxbw:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50]
@@ -2414,6 +4115,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxbd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxbd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
@@ -2421,6 +4129,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxbd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxbd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2428,6 +4143,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxbd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxbd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2435,6 +4157,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxbd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxbd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2442,6 +4171,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxbd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxbd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00]
@@ -2449,6 +4185,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxbd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxbd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00]
@@ -2456,6 +4199,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxbd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxbd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50]
@@ -2486,6 +4236,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxbq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxbq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
@@ -2493,6 +4250,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxbq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxbq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2500,6 +4264,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxbq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxbq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2507,6 +4278,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxbq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxbq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2514,6 +4292,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxbq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxbq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00]
@@ -2521,6 +4306,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxbq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxbq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00]
@@ -2528,6 +4320,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxbq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxbq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50]
@@ -2558,6 +4357,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxdq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxdq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
@@ -2565,6 +4371,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxdq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxdq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2572,6 +4385,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxdq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxdq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2579,6 +4399,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxdq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxdq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2586,6 +4413,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxdq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxdq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00]
@@ -2593,6 +4427,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxdq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxdq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00]
@@ -2600,6 +4441,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxdq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxdq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50]
@@ -2630,6 +4478,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxwd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxwd:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
@@ -2637,6 +4492,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxwd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxwd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2644,6 +4506,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxwd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxwd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2651,6 +4520,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxwd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxwd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2658,6 +4534,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxwd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxwd:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00]
@@ -2665,6 +4548,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxwd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxwd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00]
@@ -2672,6 +4562,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxwd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxwd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50]
@@ -2702,6 +4599,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmovzxwq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
+; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmovzxwq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
@@ -2709,6 +4613,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmovzxwq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmovzxwq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2716,6 +4627,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmovzxwq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmovzxwq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2723,6 +4641,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmovzxwq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmovzxwq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2730,6 +4655,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmovzxwq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
+; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmovzxwq:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00]
@@ -2737,6 +4669,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmovzxwq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
+; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
+; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmovzxwq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00]
@@ -2744,6 +4683,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmovzxwq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25]
+; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
+; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmovzxwq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50]
@@ -2771,42 +4717,84 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmuldq:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmuldq:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmuldq:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmuldq:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmuldq:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
+; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmuldq:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmuldq:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmuldq:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmuldq:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmuldq:
; SKX: # %bb.0:
; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmuldq:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmuldq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmuldq:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmuldq:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2833,42 +4821,84 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_pmulld:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00]
+; SANDY-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_pmulld:
; SANDY: # %bb.0:
; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_pmulld:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00]
+; HASWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:2.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_pmulld:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_pmulld:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00]
+; BROADWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [15:2.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_pmulld:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00]
; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_pmulld:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00]
+; SKYLAKE-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_pmulld:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00]
; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_pmulld:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67]
+; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_pmulld:
; SKX: # %bb.0:
; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67]
; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_pmulld:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [2:1.00]
+; BTVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_pmulld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_pmulld:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_pmulld:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00]
@@ -2901,6 +4931,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_ptest:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; SANDY-SSE-NEXT: setb %al # sched: [1:0.50]
+; SANDY-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; SANDY-SSE-NEXT: setb %cl # sched: [1:0.50]
+; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
+; SANDY-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_ptest:
; SANDY: # %bb.0:
; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2911,6 +4951,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_ptest:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; HASWELL-SSE-NEXT: setb %al # sched: [1:0.50]
+; HASWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; HASWELL-SSE-NEXT: setb %cl # sched: [1:0.50]
+; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; HASWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_ptest:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2921,6 +4971,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_ptest:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
+; BROADWELL-SSE-NEXT: setb %al # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [7:1.00]
+; BROADWELL-SSE-NEXT: setb %cl # sched: [1:0.50]
+; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_ptest:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
@@ -2931,6 +4991,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_ptest:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
+; SKYLAKE-SSE-NEXT: setb %al # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00]
+; SKYLAKE-SSE-NEXT: setb %cl # sched: [1:0.50]
+; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_ptest:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2941,6 +5011,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_ptest:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
+; SKX-SSE-NEXT: setb %al # sched: [1:0.50]
+; SKX-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00]
+; SKX-SSE-NEXT: setb %cl # sched: [1:0.50]
+; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; SKX-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_ptest:
; SKX: # %bb.0:
; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2951,6 +5031,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_ptest:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: setb %al # sched: [1:0.50]
+; BTVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: setb %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50]
+; BTVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_ptest:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00]
@@ -2961,6 +5051,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_ptest:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00]
+; ZNVER1-SSE-NEXT: setb %al # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
+; ZNVER1-SSE-NEXT: setb %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_ptest:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00]
@@ -2994,6 +5094,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundpd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundpd:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3001,6 +5108,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundpd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundpd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
@@ -3008,6 +5122,14 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundpd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundpd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00]
@@ -3015,6 +5137,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundpd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundpd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00]
@@ -3022,6 +5151,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundpd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundpd:
; SKX: # %bb.0:
; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67]
@@ -3029,6 +5165,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundpd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundpd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00]
@@ -3036,6 +5179,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundpd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00]
@@ -3066,6 +5216,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundps:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundps:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
@@ -3073,6 +5230,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundps:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundps:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50]
@@ -3080,6 +5244,14 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundps:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: roundps $7, (%rdi), %xmm1 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundps $7, %xmm0, %xmm0 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundps:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00]
@@ -3087,6 +5259,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundps:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundps:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00]
@@ -3094,6 +5273,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundps:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundps:
; SKX: # %bb.0:
; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67]
@@ -3101,6 +5287,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundps:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundps:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00]
@@ -3108,6 +5301,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundps:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00]
@@ -3139,6 +5339,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundsd:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; SANDY-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundsd:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3146,6 +5354,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundsd:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundsd:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
@@ -3153,6 +5369,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundsd:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundsd:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
@@ -3160,6 +5384,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundsd:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundsd:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
@@ -3167,6 +5399,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundsd:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundsd:
; SKX: # %bb.0:
; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
@@ -3174,6 +5414,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundsd:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3181,6 +5429,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundsd:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundsd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
@@ -3212,6 +5468,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
; SLM-NEXT: retq # sched: [4:1.00]
;
+; SANDY-SSE-LABEL: test_roundss:
+; SANDY-SSE: # %bb.0:
+; SANDY-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; SANDY-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; SANDY-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
+; SANDY-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: retq # sched: [1:1.00]
+;
; SANDY-LABEL: test_roundss:
; SANDY: # %bb.0:
; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3219,6 +5483,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
+; HASWELL-SSE-LABEL: test_roundss:
+; HASWELL-SSE: # %bb.0:
+; HASWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; HASWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
+; HASWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [12:2.00]
+; HASWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; HASWELL-LABEL: test_roundss:
; HASWELL: # %bb.0:
; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50]
@@ -3226,6 +5498,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
+; BROADWELL-SSE-LABEL: test_roundss:
+; BROADWELL-SSE: # %bb.0:
+; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:2.00]
+; BROADWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50]
+; BROADWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
+;
; BROADWELL-LABEL: test_roundss:
; BROADWELL: # %bb.0:
; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00]
@@ -3233,6 +5513,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
+; SKYLAKE-SSE-LABEL: test_roundss:
+; SKYLAKE-SSE: # %bb.0:
+; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
+; SKYLAKE-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00]
+; SKYLAKE-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKYLAKE-LABEL: test_roundss:
; SKYLAKE: # %bb.0:
; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00]
@@ -3240,6 +5528,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
+; SKX-SSE-LABEL: test_roundss:
+; SKX-SSE: # %bb.0:
+; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33]
+; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67]
+; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67]
+; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33]
+; SKX-SSE-NEXT: retq # sched: [7:1.00]
+;
; SKX-LABEL: test_roundss:
; SKX: # %bb.0:
; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67]
@@ -3247,6 +5543,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
+; BTVER2-SSE-LABEL: test_roundss:
+; BTVER2-SSE: # %bb.0:
+; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
+; BTVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [8:1.00]
+; BTVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
+;
; BTVER2-LABEL: test_roundss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
@@ -3254,6 +5558,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
+; ZNVER1-SSE-LABEL: test_roundss:
+; ZNVER1-SSE: # %bb.0:
+; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.25]
+; ZNVER1-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:1.00]
+; ZNVER1-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00]
+; ZNVER1-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
+;
; ZNVER1-LABEL: test_roundss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00]
OpenPOWER on IntegriCloud