diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/sse41-schedule.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/sse41-schedule.ll | 2338 |
1 files changed, 2325 insertions, 13 deletions
diff --git a/llvm/test/CodeGen/X86/sse41-schedule.ll b/llvm/test/CodeGen/X86/sse41-schedule.ll index 88cb90fdb43..4bfcebf3cb9 100644 --- a/llvm/test/CodeGen/X86/sse41-schedule.ll +++ b/llvm/test/CodeGen/X86/sse41-schedule.ll @@ -1,14 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SLM +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_blendpd: @@ -25,6 +33,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SLM-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] @@ -32,6 +47,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] @@ -39,6 +61,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] @@ -46,6 +75,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BROADWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] @@ -53,14 +89,27 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKYLAKE-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendpd: ; SKX: # %bb.0: -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vmovapd (%rdi), %xmm2 # sched: [6:0.50] +; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm2[1] sched: [1:1.00] +; SKX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] @@ -68,6 +117,13 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BTVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] @@ -96,6 +152,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; SANDY-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendps: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] @@ -103,6 +166,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; HASWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -110,6 +180,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; BROADWELL-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:0.50] +; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -117,6 +194,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; SKYLAKE-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -124,6 +208,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] +; SKX-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendps: ; SKX: # %bb.0: ; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] @@ -131,6 +222,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [6:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] @@ -138,6 +236,13 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; ZNVER1-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [8:0.50] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] @@ -170,42 +275,105 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SLM-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendvpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] +; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00] +; SANDY-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; SANDY-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendvpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendvpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; HASWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; HASWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendvpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendvpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendvpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKYLAKE-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKYLAKE-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendvpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendvpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.33] +; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKX-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKX-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendvpd: ; SKX: # %bb.0: ; SKX-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendvpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BTVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BTVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BTVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendvpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [3:0.33] +; ZNVER1-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [11:0.67] +; ZNVER1-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendvpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -237,42 +405,105 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SLM-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_blendvps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] +; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00] +; SANDY-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; SANDY-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendvps: ; SANDY: # %bb.0: ; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_blendvps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; HASWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; HASWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_blendvps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_blendvps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_blendvps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKYLAKE-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKYLAKE-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_blendvps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_blendvps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.33] +; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKX-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKX-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_blendvps: ; SKX: # %bb.0: ; SKX-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_blendvps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BTVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BTVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BTVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_blendvps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [3:0.33] +; ZNVER1-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [11:0.67] +; ZNVER1-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_blendvps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:0.50] @@ -298,42 +529,84 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SLM-NEXT: dppd $7, (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_dppd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_dppd: ; SANDY: # %bb.0: ; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_dppd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; HASWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_dppd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_dppd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; BROADWELL-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_dppd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; BROADWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_dppd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_dppd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; SKYLAKE-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_dppd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_dppd: ; SKX: # %bb.0: ; SKX-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] ; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_dppd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00] +; BTVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [14:3.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_dppd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:3.00] ; BTVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [14:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_dppd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_dppd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -359,42 +632,84 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; SLM-NEXT: dpps $7, (%rdi), %xmm0 # sched: [6:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_dpps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] +; SANDY-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_dpps: ; SANDY: # %bb.0: ; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_dpps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] +; HASWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [20:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_dpps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] ; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [20:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_dpps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [14:2.00] +; BROADWELL-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_dpps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] ; BROADWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_dpps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.50] +; SKYLAKE-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_dpps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.50] ; SKYLAKE-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_dpps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [13:1.33] +; SKX-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [19:1.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_dpps: ; SKX: # %bb.0: ; SKX-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [13:1.33] ; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_dpps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00] +; BTVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [16:3.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_dpps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [11:3.00] ; BTVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [16:3.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_dpps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_dpps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -420,42 +735,84 @@ define i32 @test_extractps(<4 x float> %a0, i32 *%a1) { ; SLM-NEXT: extractps $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_extractps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_extractps: ; SANDY: # %bb.0: ; SANDY-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_extractps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_extractps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_extractps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_extractps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vextractps $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_extractps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_extractps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_extractps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_extractps: ; SKX: # %bb.0: ; SKX-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_extractps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_extractps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_extractps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:2.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_extractps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vextractps $3, %xmm0, %eax # sched: [2:2.00] @@ -482,42 +839,84 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) ; SLM-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_insertps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; SANDY-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_insertps: ; SANDY: # %bb.0: ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_insertps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; HASWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_insertps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_insertps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; BROADWELL-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_insertps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; BROADWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_insertps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_insertps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; SKYLAKE-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_insertps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; SKX-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_insertps: ; SKX: # %bb.0: ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_insertps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] +; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_insertps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] ; BTVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_insertps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] +; ZNVER1-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_insertps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50] @@ -541,36 +940,71 @@ define <2 x i64> @test_movntdqa(i8* %a0) { ; SLM-NEXT: movntdqa (%rdi), %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_movntdqa: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntdqa: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_movntdqa: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_movntdqa: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_movntdqa: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_movntdqa: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_movntdqa: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_movntdqa: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_movntdqa: ; SKX: # %bb.0: ; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_movntdqa: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [5:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_movntdqa: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_movntdqa: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [8:0.50] @@ -593,42 +1027,84 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_mpsadbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00] +; SANDY-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mpsadbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_mpsadbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] +; HASWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_mpsadbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] ; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_mpsadbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [12:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_mpsadbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [12:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_mpsadbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] +; SKYLAKE-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_mpsadbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; SKYLAKE-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_mpsadbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [4:2.00] +; SKX-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [10:2.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_mpsadbw: ; SKX: # %bb.0: ; SKX-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [4:2.00] ; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_mpsadbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00] +; BTVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [8:2.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_mpsadbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [3:2.00] ; BTVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_mpsadbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [100:?] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_mpsadbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [100:?] @@ -655,42 +1131,84 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: packusdw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_packusdw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_packusdw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_packusdw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_packusdw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_packusdw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_packusdw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; BROADWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_packusdw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_packusdw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKYLAKE-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_packusdw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:1.00] +; SKX-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_packusdw: ; SKX: # %bb.0: ; SKX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] ; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_packusdw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_packusdw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_packusdw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_packusdw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -723,42 +1241,105 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; SLM-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pblendvb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; SANDY-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00] +; SANDY-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; SANDY-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pblendvb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pblendvb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; HASWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; HASWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pblendvb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pblendvb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BROADWELL-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pblendvb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pblendvb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKYLAKE-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKYLAKE-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pblendvb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKYLAKE-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pblendvb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; SKX-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:0.67] +; SKX-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:0.67] +; SKX-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pblendvb: ; SKX: # %bb.0: ; SKX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:0.67] ; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pblendvb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; BTVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [7:2.00] +; BTVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pblendvb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] ; BTVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pblendvb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pblendvb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -786,6 +1367,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pblendw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] +; SANDY-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pblendw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] @@ -793,6 +1381,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pblendw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; HASWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pblendw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -800,6 +1395,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pblendw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; BROADWELL-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pblendw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -807,6 +1409,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pblendw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pblendw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -814,6 +1423,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pblendw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] +; SKX-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pblendw: ; SKX: # %bb.0: ; SKX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] @@ -821,6 +1437,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pblendw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] +; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pblendw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] @@ -828,6 +1451,13 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pblendw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] +; ZNVER1-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pblendw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.33] @@ -854,42 +1484,84 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pcmpeqq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pcmpeqq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pcmpeqq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pcmpeqq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pcmpeqq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pcmpeqq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pcmpeqq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pcmpeqq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pcmpeqq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pcmpeqq: ; SKX: # %bb.0: ; SKX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pcmpeqq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pcmpeqq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pcmpeqq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pcmpeqq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -916,42 +1588,84 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; SLM-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrb: ; SKX: # %bb.0: ; SKX-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:2.00] @@ -979,6 +1693,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SLM-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -986,6 +1707,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -993,6 +1721,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -1000,6 +1735,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; BROADWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] @@ -1007,6 +1749,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SKYLAKE-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrd: ; SKX: # %bb.0: ; SKX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.33] @@ -1014,6 +1763,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50] @@ -1021,6 +1777,13 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; BTVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.25] @@ -1047,42 +1810,84 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; SLM-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] ; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrq: ; SKX: # %bb.0: ; SKX-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] ; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] ; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:2.00] @@ -1107,42 +1912,84 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; SLM-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [4:2.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pextrw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pextrw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pextrw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] +; HASWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] ; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pextrw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:1.00] +; BROADWELL-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pextrw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] ; BROADWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pextrw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pextrw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SKYLAKE-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pextrw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] +; SKX-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [2:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pextrw: ; SKX: # %bb.0: ; SKX-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] ; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pextrw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] ; BTVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pextrw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [2:2.00] +; ZNVER1-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:3.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pextrw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:2.00] @@ -1168,42 +2015,84 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; SLM-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_phminposuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_phminposuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_phminposuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_phminposuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_phminposuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_phminposuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_phminposuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_phminposuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_phminposuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_phminposuw: ; SKX: # %bb.0: ; SKX-NEXT: vphminposuw (%rdi), %xmm0 # sched: [10:0.50] ; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_phminposuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_phminposuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_phminposuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_phminposuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] @@ -1229,42 +2118,84 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; SLM-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKYLAKE-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrb: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] @@ -1289,42 +2220,84 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SLM-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] ; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; BROADWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKYLAKE-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrd: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] ; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] @@ -1351,6 +2324,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pinsrq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pinsrq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] @@ -1358,6 +2338,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pinsrq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; HASWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pinsrq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1365,6 +2352,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pinsrq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; BROADWELL-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pinsrq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1372,6 +2366,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pinsrq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; SKYLAKE-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pinsrq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1379,6 +2380,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pinsrq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:2.00] +; SKX-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pinsrq: ; SKX: # %bb.0: ; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] @@ -1386,6 +2394,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pinsrq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [6:1.00] +; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pinsrq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [6:1.00] @@ -1393,6 +2408,13 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pinsrq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pinsrq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [8:0.50] @@ -1419,42 +2441,84 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pmaxsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxsb: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1480,42 +2544,84 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmaxsd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxsd: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1541,42 +2647,84 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmaxud (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxud: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxud: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxud: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxud: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxud: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxud: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxud: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxud: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxud: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxud: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxud: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxud: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxud: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxud: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1602,42 +2750,84 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pmaxuw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmaxuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmaxuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmaxuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmaxuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmaxuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmaxuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmaxuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmaxuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmaxuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmaxuw: ; SKX: # %bb.0: ; SKX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmaxuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmaxuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmaxuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmaxuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1663,42 +2853,84 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SLM-NEXT: pminsb (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminsb: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminsb: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminsb: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminsb: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminsb: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminsb: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminsb: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminsb: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminsb: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminsb: ; SKX: # %bb.0: ; SKX-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminsb: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminsb: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminsb: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminsb: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1724,42 +2956,84 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pminsd (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminsd: ; SKX: # %bb.0: ; SKX-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1785,42 +3059,84 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pminud (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminud: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminud: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminud: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminud: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminud: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminud: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminud: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminud: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminud: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminud: ; SKX: # %bb.0: ; SKX-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminud: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminud: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminud: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminud: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1846,42 +3162,84 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SLM-NEXT: pminuw (%rdi), %xmm0 # sched: [4:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pminuw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pminuw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pminuw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pminuw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pminuw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pminuw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pminuw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pminuw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKYLAKE-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pminuw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; SKX-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pminuw: ; SKX: # %bb.0: ; SKX-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pminuw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pminuw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pminuw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pminuw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] @@ -1910,6 +3268,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] @@ -1917,6 +3282,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1924,6 +3296,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1931,6 +3310,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1938,6 +3324,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxbw: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] @@ -1945,6 +3338,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [6:1.00] @@ -1952,6 +3352,13 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [8:0.50] @@ -1982,6 +3389,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxbd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxbd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] @@ -1989,6 +3403,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxbd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxbd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -1996,6 +3417,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxbd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxbd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -2003,6 +3431,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxbd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -2010,6 +3445,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxbd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxbd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] @@ -2017,6 +3459,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxbd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxbd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [6:1.00] @@ -2024,6 +3473,13 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxbd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [8:0.50] @@ -2054,6 +3510,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxbq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxbq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] @@ -2061,6 +3524,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxbq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxbq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2068,6 +3538,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxbq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxbq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2075,6 +3552,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxbq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxbq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2082,6 +3566,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxbq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxbq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] @@ -2089,6 +3580,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxbq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxbq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [6:1.00] @@ -2096,6 +3594,13 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxbq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxbq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [8:0.50] @@ -2126,6 +3631,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] @@ -2133,6 +3645,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2140,6 +3659,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2147,6 +3673,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2154,6 +3687,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxdq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] @@ -2161,6 +3701,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [6:1.00] @@ -2168,6 +3715,13 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [8:0.50] @@ -2198,6 +3752,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] @@ -2205,6 +3766,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2212,6 +3780,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2219,6 +3794,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2226,6 +3808,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxwd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] @@ -2233,6 +3822,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [6:1.00] @@ -2240,6 +3836,13 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [8:0.50] @@ -2270,6 +3873,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovsxwq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] +; SANDY-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovsxwq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] @@ -2277,6 +3887,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovsxwq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovsxwq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2284,6 +3901,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovsxwq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovsxwq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2291,6 +3915,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovsxwq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovsxwq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2298,6 +3929,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovsxwq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:1.00] +; SKX-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovsxwq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] @@ -2305,6 +3943,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovsxwq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovsxwq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [6:1.00] @@ -2312,6 +3957,13 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovsxwq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovsxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [8:0.50] @@ -2342,6 +3994,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxbw: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxbw: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] @@ -2349,6 +4008,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxbw: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxbw: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2356,6 +4022,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxbw: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxbw: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2363,6 +4036,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxbw: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbw: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2370,6 +4050,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxbw: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxbw: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] @@ -2377,6 +4064,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxbw: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxbw: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [6:1.00] @@ -2384,6 +4078,13 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxbw: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxbw: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [8:0.50] @@ -2414,6 +4115,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxbd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxbd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] @@ -2421,6 +4129,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxbd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxbd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2428,6 +4143,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxbd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxbd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2435,6 +4157,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxbd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2442,6 +4171,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxbd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxbd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] @@ -2449,6 +4185,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxbd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxbd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [6:1.00] @@ -2456,6 +4199,13 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxbd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxbd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [8:0.50] @@ -2486,6 +4236,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxbq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxbq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] @@ -2493,6 +4250,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxbq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxbq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2500,6 +4264,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxbq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxbq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2507,6 +4278,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxbq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxbq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2514,6 +4292,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxbq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxbq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] @@ -2521,6 +4306,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxbq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxbq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [6:1.00] @@ -2528,6 +4320,13 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxbq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxbq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [8:0.50] @@ -2558,6 +4357,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxdq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxdq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] @@ -2565,6 +4371,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxdq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxdq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2572,6 +4385,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxdq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxdq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2579,6 +4399,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxdq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxdq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2586,6 +4413,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxdq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxdq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] @@ -2593,6 +4427,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxdq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxdq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [6:1.00] @@ -2600,6 +4441,13 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxdq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxdq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [8:0.50] @@ -2630,6 +4478,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxwd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxwd: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] @@ -2637,6 +4492,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxwd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxwd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2644,6 +4506,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxwd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxwd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2651,6 +4520,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxwd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2658,6 +4534,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxwd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxwd: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] @@ -2665,6 +4548,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxwd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxwd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [6:1.00] @@ -2672,6 +4562,13 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxwd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxwd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [8:0.50] @@ -2702,6 +4599,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmovzxwq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] +; SANDY-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] +; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmovzxwq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] @@ -2709,6 +4613,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmovzxwq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; HASWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmovzxwq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2716,6 +4627,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmovzxwq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; BROADWELL-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmovzxwq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2723,6 +4641,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmovzxwq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; SKYLAKE-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmovzxwq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2730,6 +4655,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmovzxwq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] +; SKX-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmovzxwq: ; SKX: # %bb.0: ; SKX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] @@ -2737,6 +4669,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmovzxwq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] +; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] +; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmovzxwq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [6:1.00] @@ -2744,6 +4683,13 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmovzxwq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.25] +; ZNVER1-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] +; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmovzxwq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [8:0.50] @@ -2771,42 +4717,84 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmuldq: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmuldq: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmuldq: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] +; HASWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmuldq: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmuldq: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00] +; BROADWELL-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmuldq: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; BROADWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmuldq: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmuldq: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmuldq: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [10:0.50] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmuldq: ; SKX: # %bb.0: ; SKX-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmuldq: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmuldq: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmuldq: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmuldq (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmuldq: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -2833,42 +4821,84 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SLM-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_pmulld: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00] +; SANDY-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_pmulld: ; SANDY: # %bb.0: ; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_pmulld: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] +; HASWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:2.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_pmulld: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:2.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_pmulld: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:2.00] +; BROADWELL-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [15:2.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_pmulld: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] ; BROADWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [15:2.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_pmulld: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:1.00] +; SKYLAKE-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:1.00] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_pmulld: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:1.00] ; SKYLAKE-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_pmulld: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [10:0.67] +; SKX-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [16:0.67] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_pmulld: ; SKX: # %bb.0: ; SKX-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:0.67] ; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:0.67] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_pmulld: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [2:1.00] +; BTVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [7:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_pmulld: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] ; BTVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_pmulld: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_pmulld: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [4:1.00] @@ -2901,6 +4931,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SLM-NEXT: movzbl %cl, %eax # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_ptest: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; SANDY-SSE-NEXT: setb %al # sched: [1:0.50] +; SANDY-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; SANDY-SSE-NEXT: setb %cl # sched: [1:0.50] +; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] +; SANDY-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_ptest: ; SANDY: # %bb.0: ; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] @@ -2911,6 +4951,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_ptest: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; HASWELL-SSE-NEXT: setb %al # sched: [1:0.50] +; HASWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; HASWELL-SSE-NEXT: setb %cl # sched: [1:0.50] +; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; HASWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_ptest: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] @@ -2921,6 +4971,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_ptest: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; BROADWELL-SSE-NEXT: setb %al # sched: [1:0.50] +; BROADWELL-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [7:1.00] +; BROADWELL-SSE-NEXT: setb %cl # sched: [1:0.50] +; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; BROADWELL-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_ptest: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] @@ -2931,6 +4991,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BROADWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_ptest: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] +; SKYLAKE-SSE-NEXT: setb %al # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] +; SKYLAKE-SSE-NEXT: setb %cl # sched: [1:0.50] +; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_ptest: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] @@ -2941,6 +5011,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKYLAKE-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_ptest: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] +; SKX-SSE-NEXT: setb %al # sched: [1:0.50] +; SKX-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [9:1.00] +; SKX-SSE-NEXT: setb %cl # sched: [1:0.50] +; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; SKX-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_ptest: ; SKX: # %bb.0: ; SKX-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] @@ -2951,6 +5031,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_ptest: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: setb %al # sched: [1:0.50] +; BTVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: setb %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] +; BTVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.50] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_ptest: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vptest %xmm1, %xmm0 # sched: [3:1.00] @@ -2961,6 +5051,16 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; BTVER2-NEXT: movzbl %cl, %eax # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_ptest: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [1:1.00] +; ZNVER1-SSE-NEXT: setb %al # sched: [1:0.25] +; ZNVER1-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; ZNVER1-SSE-NEXT: setb %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_ptest: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vptest %xmm1, %xmm0 # sched: [1:1.00] @@ -2994,6 +5094,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundpd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] @@ -3001,6 +5108,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundpd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:0.50] @@ -3008,6 +5122,14 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundpd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: roundpd $7, (%rdi), %xmm1 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundpd $7, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundpd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:2.00] @@ -3015,6 +5137,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundpd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundpd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:1.00] @@ -3022,6 +5151,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundpd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundpd: ; SKX: # %bb.0: ; SKX-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [8:0.67] @@ -3029,6 +5165,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundpd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [8:1.00] @@ -3036,6 +5179,13 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundpd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundpd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [11:1.00] @@ -3066,6 +5216,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundps: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundps: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] @@ -3073,6 +5230,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundps: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundps: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:0.50] @@ -3080,6 +5244,14 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundps: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: roundps $7, (%rdi), %xmm1 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundps $7, %xmm0, %xmm0 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundps: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:2.00] @@ -3087,6 +5259,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundps: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundps: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:1.00] @@ -3094,6 +5273,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundps: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [8:0.67] +; SKX-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundps: ; SKX: # %bb.0: ; SKX-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [8:0.67] @@ -3101,6 +5287,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundps: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] +; BTVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundps: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [8:1.00] @@ -3108,6 +5301,13 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundps: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundps: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [11:1.00] @@ -3139,6 +5339,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundsd: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; SANDY-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundsd: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3146,6 +5354,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundsd: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; HASWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundsd: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] @@ -3153,6 +5369,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundsd: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundsd: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] @@ -3160,6 +5384,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BROADWELL-NEXT: vaddpd %xmm2, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundsd: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundsd: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] @@ -3167,6 +5399,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundsd: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [8:0.67] +; SKX-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundsd: ; SKX: # %bb.0: ; SKX-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] @@ -3174,6 +5414,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundsd: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundsd: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3181,6 +5429,14 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundsd: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundsd: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] @@ -3212,6 +5468,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SLM-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; +; SANDY-SSE-LABEL: test_roundss: +; SANDY-SSE: # %bb.0: +; SANDY-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; SANDY-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] +; SANDY-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] +; SANDY-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; SANDY-SSE-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundss: ; SANDY: # %bb.0: ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3219,6 +5483,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; +; HASWELL-SSE-LABEL: test_roundss: +; HASWELL-SSE: # %bb.0: +; HASWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; HASWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] +; HASWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [12:2.00] +; HASWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; HASWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; HASWELL-LABEL: test_roundss: ; HASWELL: # %bb.0: ; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:0.50] @@ -3226,6 +5498,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; +; BROADWELL-SSE-LABEL: test_roundss: +; BROADWELL-SSE: # %bb.0: +; BROADWELL-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:2.00] +; BROADWELL-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [6:0.50] +; BROADWELL-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] +; ; BROADWELL-LABEL: test_roundss: ; BROADWELL: # %bb.0: ; BROADWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm2 # sched: [11:2.00] @@ -3233,6 +5513,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BROADWELL-NEXT: vaddps %xmm2, %xmm0, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; +; SKYLAKE-SSE-LABEL: test_roundss: +; SKYLAKE-SSE: # %bb.0: +; SKYLAKE-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:1.00] +; SKYLAKE-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:1.00] +; SKYLAKE-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.50] +; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] +; ; SKYLAKE-LABEL: test_roundss: ; SKYLAKE: # %bb.0: ; SKYLAKE-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:1.00] @@ -3240,6 +5528,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; +; SKX-SSE-LABEL: test_roundss: +; SKX-SSE: # %bb.0: +; SKX-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [8:0.67] +; SKX-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [14:0.67] +; SKX-SSE-NEXT: addps %xmm2, %xmm0 # sched: [4:0.33] +; SKX-SSE-NEXT: retq # sched: [7:1.00] +; ; SKX-LABEL: test_roundss: ; SKX: # %bb.0: ; SKX-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [8:0.67] @@ -3247,6 +5543,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.33] ; SKX-NEXT: retq # sched: [7:1.00] ; +; BTVER2-SSE-LABEL: test_roundss: +; BTVER2-SSE: # %bb.0: +; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [8:1.00] +; BTVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] +; BTVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; BTVER2-SSE-NEXT: retq # sched: [4:1.00] +; ; BTVER2-LABEL: test_roundss: ; BTVER2: # %bb.0: ; BTVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] @@ -3254,6 +5558,14 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; +; ZNVER1-SSE-LABEL: test_roundss: +; ZNVER1-SSE: # %bb.0: +; ZNVER1-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [11:1.00] +; ZNVER1-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [4:1.00] +; ZNVER1-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] +; ; ZNVER1-LABEL: test_roundss: ; ZNVER1: # %bb.0: ; ZNVER1-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [4:1.00] |