diff options
author | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2019-02-04 12:51:26 +0000 |
---|---|---|
committer | Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> | 2019-02-04 12:51:26 +0000 |
commit | edbf06a76771f77f70ad6ee1b4641a2d53c14152 (patch) | |
tree | 470ba327830e755baa528289fbc4a6d454f92f1e /llvm/test/CodeGen/X86/sse2-schedule.ll | |
parent | fb222aa31950e7338f17b73a9490657f53e703ae (diff) | |
download | bcm5719-llvm-edbf06a76771f77f70ad6ee1b4641a2d53c14152.tar.gz bcm5719-llvm-edbf06a76771f77f70ad6ee1b4641a2d53c14152.zip |
[AsmPrinter] Remove hidden flag -print-schedule.
This patch removes hidden codegen flag -print-schedule effectively reverting the
logic originally committed as r300311
(https://llvm.org/viewvc/llvm-project?view=revision&revision=300311).
Flag -print-schedule was originally introduced by r300311 to address PR32216
(https://bugs.llvm.org/show_bug.cgi?id=32216). That bug was about adding "Better
testing of schedule model instruction latencies/throughputs".
These days, we can use llvm-mca to test scheduling models. So there is no longer
a need for flag -print-schedule in LLVM. The main use case for PR32216 is
now addressed by llvm-mca.
Flag -print-schedule is mainly used for debugging purposes, and it is only
actually used by x86 specific tests. We already have extensive (latency and
throughput) tests under "test/tools/llvm-mca" for X86 processor models. That
means, most (if not all) existing -print-schedule tests for X86 are redundant.
When flag -print-schedule was first added to LLVM, several files had to be
modified; a few APIs gained new arguments (see for example method
MCAsmStreamer::EmitInstruction), and MCSubtargetInfo/TargetSubtargetInfo gained
a couple of getSchedInfoStr() methods.
Method getSchedInfoStr() had to originally work for both MCInst and
MachineInstr. The original implmentation of getSchedInfoStr() introduced a
subtle layering violation (reported as PR37160 and then fixed/worked-around by
r330615).
In retrospect, that new API could have been designed more optimally. We can
always query MCSchedModel to get the latency and throughput. More importantly,
the "sched-info" string should not have been generated by the subtarget.
Note, r317782 fixed an issue where "print-schedule" didn't work very well in the
presence of inline assembly. That commit is also reverted by this change.
Differential Revision: https://reviews.llvm.org/D57244
llvm-svn: 353043
Diffstat (limited to 'llvm/test/CodeGen/X86/sse2-schedule.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-schedule.ll | 16972 |
1 files changed, 0 insertions, 16972 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll deleted file mode 100644 index ffb650f833e..00000000000 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ /dev/null @@ -1,16972 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ATOM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SLM -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SANDY-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SANDY -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,HASWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,HASWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BROADWELL-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BROADWELL -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKYLAKE-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 -mattr=-avx2,-xop | FileCheck %s --check-prefixes=CHECK,BDVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1 - -define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_addpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addpd: -; ATOM: # %bb.0: -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addpd: -; SLM: # %bb.0: -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fadd <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_addsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_addsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_addsd: -; ATOM: # %bb.0: -; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: addsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_addsd: -; SLM: # %bb.0: -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: addsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_addsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_addsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_addsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_addsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_addsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_addsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_addsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_addsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_addsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_addsd: -; SKX: # %bb.0: -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_addsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_addsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_addsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_addsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_addsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: addsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_addsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fadd double %1, %2 - ret double %3 -} - -define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_andpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_andpd: -; ATOM: # %bb.0: -; ATOM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_andpd: -; SLM: # %bb.0: -; SLM-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_andpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_andpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_andpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_andpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_andpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_andpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andpd: -; SKX: # %bb.0: -; SKX-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_andpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_andpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_andpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_andpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_andpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_andpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = and <4 x i32> %1, %2 - %4 = load <2 x double>, <2 x double> *%a2, align 16 - %5 = bitcast <2 x double> %4 to <4 x i32> - %6 = and <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <2 x double> - %8 = fadd <2 x double> %a1, %7 - ret <2 x double> %8 -} - -define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_andnotpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_andnotpd: -; ATOM: # %bb.0: -; ATOM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: andnpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_andnotpd: -; SLM: # %bb.0: -; SLM-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: andnpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_andnotpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_andnotpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_andnotpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_andnotpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_andnotpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_andnotpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_andnotpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_andnotpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_andnotpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_andnotpd: -; SKX: # %bb.0: -; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_andnotpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_andnotpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_andnotpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_andnotpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_andnotpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: andnpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andnpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_andnotpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = xor <4 x i32> %1, <i32 -1, i32 -1, i32 -1, i32 -1> - %4 = and <4 x i32> %3, %2 - %5 = load <2 x double>, <2 x double> *%a2, align 16 - %6 = bitcast <2 x double> %5 to <4 x i32> - %7 = xor <4 x i32> %4, <i32 -1, i32 -1, i32 -1, i32 -1> - %8 = and <4 x i32> %6, %7 - %9 = bitcast <4 x i32> %8 to <2 x double> - %10 = fadd <2 x double> %a1, %9 - ret <2 x double> %10 -} - -define void @test_clflush(i8* %p){ -; GENERIC-LABEL: test_clflush: -; GENERIC: # %bb.0: -; GENERIC-NEXT: clflush (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_clflush: -; ATOM: # %bb.0: -; ATOM-NEXT: clflush (%rdi) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_clflush: -; SLM: # %bb.0: -; SLM-NEXT: clflush (%rdi) # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_clflush: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: clflush (%rdi) # sched: [5:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_clflush: -; SANDY: # %bb.0: -; SANDY-NEXT: clflush (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_clflush: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_clflush: -; HASWELL: # %bb.0: -; HASWELL-NEXT: clflush (%rdi) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_clflush: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_clflush: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: clflush (%rdi) # sched: [2:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_clflush: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_clflush: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_clflush: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_clflush: -; SKX: # %bb.0: -; SKX-NEXT: clflush (%rdi) # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_clflush: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: clflush (%rdi) # sched: [5:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_clflush: -; BDVER2: # %bb.0: -; BDVER2-NEXT: clflush (%rdi) # sched: [5:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_clflush: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: clflush (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_clflush: -; BTVER2: # %bb.0: -; BTVER2-NEXT: clflush (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_clflush: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: clflush (%rdi) # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_clflush: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: clflush (%rdi) # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - tail call void @llvm.x86.sse2.clflush(i8* %p) - ret void -} -declare void @llvm.x86.sse2.clflush(i8*) nounwind - -define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_cmppd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmppd: -; ATOM: # %bb.0: -; ATOM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmppd: -; SLM: # %bb.0: -; SLM-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cmppd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmppd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cmppd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cmppd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cmppd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmppd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cmppd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmppd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cmppd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmppd: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [4:0.50] -; SKX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cmppd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] -; BDVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cmppd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] -; BDVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cmppd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cmppd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cmppd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cmppd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fcmp oeq <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fcmp oeq <2 x double> %a0, %2 - %4 = or <2 x i1> %1, %3 - %5 = sext <2 x i1> %4 to <2 x i64> - %6 = bitcast <2 x i64> %5 to <2 x double> - ret <2 x double> %6 -} - -define double @test_cmpsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_cmpsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cmpsd: -; ATOM: # %bb.0: -; ATOM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cmpsd: -; SLM: # %bb.0: -; SLM-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cmpsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cmpsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cmpsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cmpsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cmpsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cmpsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cmpsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cmpsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cmpsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cmpsd: -; SKX: # %bb.0: -; SKX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cmpsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cmpsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cmpsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cmpsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cmpsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cmpsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x double> undef, double %a0, i32 0 - %2 = insertelement <2 x double> undef, double %a1, i32 0 - %3 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %1, <2 x double> %2, i8 0) - %4 = load double, double *%a2, align 8 - %5 = insertelement <2 x double> undef, double %4, i32 0 - %6 = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %3, <2 x double> %5, i8 0) - %7 = extractelement <2 x double> %6, i32 0 - ret double %7 -} -declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone - -define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_comisd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %cl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] -; GENERIC-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %dl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] -; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] -; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_comisd: -; ATOM: # %bb.0: -; ATOM-NEXT: comisd %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %cl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] -; ATOM-NEXT: comisd (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %dl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] -; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] -; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_comisd: -; SLM: # %bb.0: -; SLM-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %cl # sched: [1:0.50] -; SLM-NEXT: andb %al, %cl # sched: [1:0.50] -; SLM-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %dl # sched: [1:0.50] -; SLM-NEXT: andb %al, %dl # sched: [1:0.50] -; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] -; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_comisd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_comisd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %cl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %dl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_comisd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_comisd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_comisd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_comisd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_comisd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_comisd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_comisd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_comisd: -; SKX: # %bb.0: -; SKX-NEXT: vcomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %cl # sched: [1:0.50] -; SKX-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %dl # sched: [1:0.50] -; SKX-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_comisd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_comisd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_comisd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_comisd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-NEXT: vcomisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_comisd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: comisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_comisd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-NEXT: vcomisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 8 - %3 = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %2) - %4 = or i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_cvtdq2pd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtdq2pd: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtdq2pd: -; SLM: # %bb.0: -; SLM-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtdq2pd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtdq2pd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtdq2pd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtdq2pd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtdq2pd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtdq2pd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtdq2pd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtdq2pd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtdq2pd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtdq2pd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [11:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtdq2pd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtdq2pd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtdq2pd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtdq2pd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtdq2pd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtdq2pd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> - %2 = sitofp <2 x i32> %1 to <2 x double> - %3 = load <4 x i32>, <4 x i32>*%a1, align 16 - %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <2 x i32> <i32 0, i32 1> - %5 = sitofp <2 x i32> %4 to <2 x double> - %6 = fadd <2 x double> %2, %5 - ret <2 x double> %6 -} - -define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_cvtdq2ps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtdq2ps: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtdq2ps (%rdi), %xmm1 # sched: [7:3.50] -; ATOM-NEXT: cvtdq2ps %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtdq2ps: -; SLM: # %bb.0: -; SLM-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtdq2ps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtdq2ps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtdq2ps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtdq2ps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtdq2ps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtdq2ps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtdq2ps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtdq2ps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtdq2ps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtdq2ps: -; SKX: # %bb.0: -; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtdq2ps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtdq2ps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtdq2ps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtdq2ps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtdq2ps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtdq2ps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp <4 x i32> %a0 to <4 x float> - %2 = load <4 x i32>, <4 x i32>*%a1, align 16 - %3 = sitofp <4 x i32> %2 to <4 x float> - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} - -define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_cvtpd2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtpd2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtpd2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtpd2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtpd2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtpd2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtpd2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtpd2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtpd2dq (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpd2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtpd2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpd2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtpd2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpd2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtpd2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtpd2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtpd2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtpd2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtpd2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtpd2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %2) - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone - -define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_cvtpd2ps: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtpd2ps: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtpd2ps: -; SLM: # %bb.0: -; SLM-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtpd2ps: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtpd2ps: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtpd2ps: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtpd2ps: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtpd2ps: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtpd2ps (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtpd2ps: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtpd2ps: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtpd2ps: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtpd2ps: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtpd2ps: -; SKX: # %bb.0: -; SKX-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtpd2ps: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtpd2ps: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtpd2ps: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtpd2ps: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtpd2ps: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtpd2ps: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [11:1.00] -; ZNVER1-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %2) - %4 = fadd <4 x float> %1, %3 - ret <4 x float> %4 -} -declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone - -define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_cvtps2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtps2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtps2dq (%rdi), %xmm1 # sched: [7:3.50] -; ATOM-NEXT: cvtps2dq %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtps2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtps2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtps2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtps2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtps2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtps2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtps2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtps2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtps2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtps2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtps2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtps2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtps2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtps2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtps2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtps2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtps2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %2) - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone - -define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_cvtps2pd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtps2pd: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtps2pd (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvtps2pd %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtps2pd: -; SLM: # %bb.0: -; SLM-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtps2pd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; SANDY-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtps2pd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtps2pd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; HASWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtps2pd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtps2pd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtps2pd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtps2pd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtps2pd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtps2pd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtps2pd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [9:0.50] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtps2pd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtps2pd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtps2pd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] -; BTVER2-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtps2pd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] -; BTVER2-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtps2pd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtps2pd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [10:1.00] -; ZNVER1-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x float> %a0, <4 x float> undef, <2 x i32> <i32 0, i32 1> - %2 = fpext <2 x float> %1 to <2 x double> - %3 = load <4 x float>, <4 x float> *%a1, align 16 - %4 = shufflevector <4 x float> %3, <4 x float> undef, <2 x i32> <i32 0, i32 1> - %5 = fpext <2 x float> %4 to <2 x double> - %6 = fadd <2 x double> %2, %5 - ret <2 x double> %6 -} - -define i32 @test_cvtsd2si(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvtsd2si: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] -; GENERIC-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsd2si: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsd2si (%rdi), %eax # sched: [9:4.50] -; ATOM-NEXT: cvtsd2si %xmm0, %ecx # sched: [8:4.00] -; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsd2si: -; SLM: # %bb.0: -; SLM-NEXT: cvtsd2si (%rdi), %eax # sched: [7:1.00] -; SLM-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:0.50] -; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsd2si: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsd2si: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [10:1.00] -; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsd2si: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsd2si: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsd2si: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsd2si: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsd2si: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsd2si: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsd2si: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsd2si: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvtsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsd2si: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsd2si: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsd2si: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsd2si: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-NEXT: vcvtsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsd2si: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsd2si: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-NEXT: vcvtsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x double> undef, double %a0, i32 0 - %2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %1) - %3 = load double, double *%a1, align 8 - %4 = insertelement <2 x double> undef, double %3, i32 0 - %5 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %4) - %6 = add i32 %2, %5 - ret i32 %6 -} -declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone - -define i64 @test_cvtsd2siq(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvtsd2siq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] -; GENERIC-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsd2siq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsd2si (%rdi), %rax # sched: [9:4.50] -; ATOM-NEXT: cvtsd2si %xmm0, %rcx # sched: [8:4.00] -; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsd2siq: -; SLM: # %bb.0: -; SLM-NEXT: cvtsd2si (%rdi), %rax # sched: [7:1.00] -; SLM-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:0.50] -; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsd2siq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsd2siq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] -; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsd2siq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsd2siq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsd2siq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsd2siq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsd2siq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsd2siq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsd2siq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsd2siq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-NEXT: vcvtsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsd2siq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsd2siq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsd2siq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsd2siq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-NEXT: vcvtsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsd2siq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsd2siq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x double> undef, double %a0, i32 0 - %2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %1) - %3 = load double, double *%a1, align 8 - %4 = insertelement <2 x double> undef, double %3, i32 0 - %5 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %4) - %6 = add i64 %2, %5 - ret i64 %6 -} -declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone - -define float @test_cvtsd2ss(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvtsd2ss: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsd2ss: -; ATOM: # %bb.0: -; ATOM-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: cvtsd2ss %xmm0, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: cvtsd2ss %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addss %xmm2, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsd2ss: -; SLM: # %bb.0: -; SLM-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:0.50] -; SLM-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsd2ss: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsd2ss: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsd2ss: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsd2ss: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsd2ss: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsd2ss: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; BROADWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsd2ss: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsd2ss: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKYLAKE-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsd2ss: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addss %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsd2ss: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsd2ss: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsd2ss: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsd2ss: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsd2ss: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [7:2.00] -; BTVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsd2ss: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsd2ss: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; ZNVER1-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptrunc double %a0 to float - %2 = load double, double *%a1, align 8 - %3 = fptrunc double %2 to float - %4 = fadd float %1, %3 - ret float %4 -} - -define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { -; GENERIC-LABEL: test_cvtsi2sd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsi2sd: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsi2sd: -; SLM: # %bb.0: -; SLM-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:0.50] -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsi2sd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsi2sd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsi2sd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsi2sd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsi2sd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsi2sd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsi2sd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsi2sd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsi2sd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsi2sd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsi2sd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [14:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsi2sd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [14:1.00] -; BDVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsi2sd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] -; BTVER2-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [10:1.00] -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsi2sd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [10:1.00] -; BTVER2-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsi2sd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsi2sd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp i32 %a0 to double - %2 = load i32, i32 *%a1, align 8 - %3 = sitofp i32 %2 to double - %4 = fadd double %1, %3 - ret double %4 -} - -define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { -; GENERIC-LABEL: test_cvtsi2sdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtsi2sdq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: addsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtsi2sdq: -; SLM: # %bb.0: -; SLM-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:0.50] -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtsi2sdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtsi2sdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtsi2sdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtsi2sdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtsi2sdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtsi2sdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtsi2sdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtsi2sdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtsi2sdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtsi2sdq: -; SKX: # %bb.0: -; SKX-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtsi2sdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [13:1.00] -; BDVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtsi2sdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [14:1.00] -; BDVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtsi2sdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] -; BTVER2-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [10:1.00] -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtsi2sdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [10:1.00] -; BTVER2-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtsi2sdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtsi2sdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sitofp i64 %a0 to double - %2 = load i64, i64 *%a1, align 8 - %3 = sitofp i64 %2 to double - %4 = fadd double %1, %3 - ret double %4 -} - -; TODO - cvtss2sd_m - -define double @test_cvtss2sd(float %a0, float *%a1) { -; GENERIC-LABEL: test_cvtss2sd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvtss2sd: -; ATOM: # %bb.0: -; ATOM-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] -; ATOM-NEXT: cvtss2sd %xmm0, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: xorps %xmm0, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: cvtss2sd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: addsd %xmm2, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvtss2sd: -; SLM: # %bb.0: -; SLM-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [3:1.00] -; SLM-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:0.50] -; SLM-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvtss2sd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] -; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvtss2sd: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvtss2sd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] -; HASWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvtss2sd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] -; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvtss2sd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvtss2sd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] -; BROADWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvtss2sd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvtss2sd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKYLAKE-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvtss2sd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [5:1.00] -; SKX-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvtss2sd: -; SKX: # %bb.0: -; SKX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [5:1.00] -; SKX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvtss2sd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvtss2sd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; BDVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvtss2sd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [7:2.00] -; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvtss2sd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [7:2.00] -; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [7:2.00] -; BTVER2-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvtss2sd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvtss2sd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [4:1.00] -; ZNVER1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fpext float %a0 to double - %2 = load float, float *%a1, align 4 - %3 = fpext float %2 to double - %4 = fadd double %1, %3 - ret double %4 -} - -define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_cvttpd2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttpd2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [8:4.00] -; ATOM-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [7:3.50] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttpd2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttpd2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; SANDY-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttpd2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttpd2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttpd2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttpd2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttpd2dq (%rdi), %xmm1 # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttpd2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; BROADWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttpd2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKYLAKE-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttpd2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKYLAKE-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttpd2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; SKX-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [11:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttpd2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; SKX-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttpd2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [8:1.00] -; BDVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [13:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttpd2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [13:1.00] -; BDVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [8:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttpd2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttpd2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttpd2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttpd2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi <2 x double> %a0 to <2 x i32> - %2 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - %3 = load <2 x double>, <2 x double> *%a1, align 16 - %4 = fptosi <2 x double> %3 to <2 x i32> - %5 = shufflevector <2 x i32> %4, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> - %6 = add <4 x i32> %2, %5 - ret <4 x i32> %6 -} - -define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { -; GENERIC-LABEL: test_cvttps2dq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttps2dq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttps2dq (%rdi), %xmm1 # sched: [7:3.50] -; ATOM-NEXT: cvttps2dq %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttps2dq: -; SLM: # %bb.0: -; SLM-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SLM-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttps2dq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttps2dq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttps2dq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttps2dq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttps2dq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttps2dq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttps2dq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttps2dq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttps2dq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttps2dq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [10:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttps2dq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [4:1.00] -; BDVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttps2dq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] -; BDVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttps2dq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttps2dq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [8:1.00] -; BTVER2-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttps2dq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [5:1.00] -; ZNVER1-SSE-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [12:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttps2dq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [12:1.00] -; ZNVER1-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [5:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi <4 x float> %a0 to <4 x i32> - %2 = load <4 x float>, <4 x float> *%a1, align 16 - %3 = fptosi <4 x float> %2 to <4 x i32> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define i32 @test_cvttsd2si(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvttsd2si: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] -; GENERIC-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttsd2si: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttsd2si (%rdi), %eax # sched: [9:4.50] -; ATOM-NEXT: cvttsd2si %xmm0, %ecx # sched: [8:4.00] -; ATOM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttsd2si: -; SLM: # %bb.0: -; SLM-NEXT: cvttsd2si (%rdi), %eax # sched: [7:1.00] -; SLM-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:0.50] -; SLM-NEXT: addl %ecx, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttsd2si: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; SANDY-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttsd2si: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [10:1.00] -; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttsd2si: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttsd2si: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] -; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttsd2si: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttsd2si: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [9:1.00] -; BROADWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] -; BROADWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttsd2si: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttsd2si: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttsd2si: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttsd2si: -; SKX: # %bb.0: -; SKX-NEXT: vcvttsd2si %xmm0, %ecx # sched: [6:1.00] -; SKX-NEXT: vcvttsd2si (%rdi), %eax # sched: [11:1.00] -; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttsd2si: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttsd2si: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [18:1.00] -; BDVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [13:1.00] -; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttsd2si: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttsd2si: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] -; BTVER2-NEXT: vcvttsd2si %xmm0, %ecx # sched: [7:1.00] -; BTVER2-NEXT: addl %ecx, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttsd2si: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttsd2si: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttsd2si (%rdi), %eax # sched: [12:1.00] -; ZNVER1-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] -; ZNVER1-NEXT: addl %ecx, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi double %a0 to i32 - %2 = load double, double *%a1, align 8 - %3 = fptosi double %2 to i32 - %4 = add i32 %1, %3 - ret i32 %4 -} - -define i64 @test_cvttsd2siq(double %a0, double *%a1) { -; GENERIC-LABEL: test_cvttsd2siq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] -; GENERIC-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_cvttsd2siq: -; ATOM: # %bb.0: -; ATOM-NEXT: cvttsd2si (%rdi), %rax # sched: [9:4.50] -; ATOM-NEXT: cvttsd2si %xmm0, %rcx # sched: [8:4.00] -; ATOM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_cvttsd2siq: -; SLM: # %bb.0: -; SLM-NEXT: cvttsd2si (%rdi), %rax # sched: [7:1.00] -; SLM-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:0.50] -; SLM-NEXT: addq %rcx, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_cvttsd2siq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; SANDY-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_cvttsd2siq: -; SANDY: # %bb.0: -; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] -; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_cvttsd2siq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_cvttsd2siq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] -; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_cvttsd2siq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_cvttsd2siq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [9:1.00] -; BROADWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] -; BROADWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_cvttsd2siq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_cvttsd2siq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKYLAKE-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_cvttsd2siq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_cvttsd2siq: -; SKX: # %bb.0: -; SKX-NEXT: vcvttsd2si %xmm0, %rcx # sched: [6:1.00] -; SKX-NEXT: vcvttsd2si (%rdi), %rax # sched: [11:1.00] -; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_cvttsd2siq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_cvttsd2siq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [18:1.00] -; BDVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [13:1.00] -; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_cvttsd2siq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_cvttsd2siq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] -; BTVER2-NEXT: vcvttsd2si %xmm0, %rcx # sched: [7:1.00] -; BTVER2-NEXT: addq %rcx, %rax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_cvttsd2siq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: cvttsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-SSE-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-SSE-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_cvttsd2siq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vcvttsd2si (%rdi), %rax # sched: [12:1.00] -; ZNVER1-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] -; ZNVER1-NEXT: addq %rcx, %rax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fptosi double %a0 to i64 - %2 = load double, double *%a1, align 8 - %3 = fptosi double %2 to i64 - %4 = add i64 %1, %3 - ret i64 %4 -} - -define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_divpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00] -; GENERIC-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_divpd: -; ATOM: # %bb.0: -; ATOM-NEXT: divpd %xmm1, %xmm0 # sched: [125:62.50] -; ATOM-NEXT: divpd (%rdi), %xmm0 # sched: [125:62.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_divpd: -; SLM: # %bb.0: -; SLM-NEXT: divpd %xmm1, %xmm0 # sched: [69:69.00] -; SLM-NEXT: divpd (%rdi), %xmm0 # sched: [72:69.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_divpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [22:22.00] -; SANDY-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [28:22.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] -; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_divpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [20:14.00] -; HASWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [26:14.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_divpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [20:14.00] -; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [26:14.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_divpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:8.00] -; BROADWELL-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [19:8.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:8.00] -; BROADWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:8.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_divpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00] -; SKYLAKE-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKYLAKE-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_divpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [14:3.00] -; SKX-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [20:4.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divpd: -; SKX: # %bb.0: -; SKX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKX-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [20:4.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_divpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [14:9.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_divpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [14:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_divpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [24:19.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_divpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_divpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: divpd %xmm1, %xmm0 # sched: [15:1.00] -; ZNVER1-SSE-NEXT: divpd (%rdi), %xmm0 # sched: [22:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_divpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] -; ZNVER1-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [22:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fdiv <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_divsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_divsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00] -; GENERIC-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_divsd: -; ATOM: # %bb.0: -; ATOM-NEXT: divsd %xmm1, %xmm0 # sched: [62:31.00] -; ATOM-NEXT: divsd (%rdi), %xmm0 # sched: [62:31.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_divsd: -; SLM: # %bb.0: -; SLM-NEXT: divsd %xmm1, %xmm0 # sched: [34:32.00] -; SLM-NEXT: divsd (%rdi), %xmm0 # sched: [37:32.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_divsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [22:22.00] -; SANDY-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [28:22.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_divsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:22.00] -; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:22.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_divsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [20:14.00] -; HASWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [25:14.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_divsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [20:14.00] -; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [25:14.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_divsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:4.00] -; BROADWELL-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:8.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_divsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:4.00] -; BROADWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:8.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_divsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00] -; SKYLAKE-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_divsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKYLAKE-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_divsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [14:3.00] -; SKX-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [19:4.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_divsd: -; SKX: # %bb.0: -; SKX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [14:3.00] -; SKX-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_divsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [9:9.50] -; BDVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [14:9.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_divsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [9:9.50] -; BDVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [14:9.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_divsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [19:19.00] -; BTVER2-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [24:19.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_divsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:19.00] -; BTVER2-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [24:19.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_divsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: divsd %xmm1, %xmm0 # sched: [15:1.00] -; ZNVER1-SSE-NEXT: divsd (%rdi), %xmm0 # sched: [22:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_divsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [15:1.00] -; ZNVER1-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [22:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fdiv double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fdiv double %1, %2 - ret double %3 -} - -define void @test_lfence() { -; GENERIC-LABEL: test_lfence: -; GENERIC: # %bb.0: -; GENERIC-NEXT: lfence # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_lfence: -; ATOM: # %bb.0: -; ATOM-NEXT: lfence # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_lfence: -; SLM: # %bb.0: -; SLM-NEXT: lfence # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_lfence: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: lfence # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_lfence: -; SANDY: # %bb.0: -; SANDY-NEXT: lfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_lfence: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: lfence # sched: [2:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_lfence: -; HASWELL: # %bb.0: -; HASWELL-NEXT: lfence # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_lfence: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: lfence # sched: [2:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_lfence: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: lfence # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_lfence: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: lfence # sched: [2:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_lfence: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: lfence # sched: [2:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_lfence: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: lfence # sched: [2:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_lfence: -; SKX: # %bb.0: -; SKX-NEXT: lfence # sched: [2:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_lfence: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: lfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_lfence: -; BDVER2: # %bb.0: -; BDVER2-NEXT: lfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_lfence: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: lfence # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_lfence: -; BTVER2: # %bb.0: -; BTVER2-NEXT: lfence # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_lfence: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: lfence # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_lfence: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: lfence # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.sse2.lfence() - ret void -} -declare void @llvm.x86.sse2.lfence() nounwind readnone - -define void @test_mfence() { -; GENERIC-LABEL: test_mfence: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mfence # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mfence: -; ATOM: # %bb.0: -; ATOM-NEXT: mfence # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mfence: -; SLM: # %bb.0: -; SLM-NEXT: mfence # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mfence: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mfence # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mfence: -; SANDY: # %bb.0: -; SANDY-NEXT: mfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mfence: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mfence # sched: [2:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mfence: -; HASWELL: # %bb.0: -; HASWELL-NEXT: mfence # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mfence: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mfence # sched: [2:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mfence: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: mfence # sched: [2:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mfence: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mfence # sched: [3:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mfence: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: mfence # sched: [3:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mfence: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mfence # sched: [3:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mfence: -; SKX: # %bb.0: -; SKX-NEXT: mfence # sched: [3:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mfence: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mfence # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mfence: -; BDVER2: # %bb.0: -; BDVER2-NEXT: mfence # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mfence: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mfence # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mfence: -; BTVER2: # %bb.0: -; BTVER2-NEXT: mfence # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mfence: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mfence # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mfence: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: mfence # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.sse2.mfence() - ret void -} -declare void @llvm.x86.sse2.mfence() nounwind readnone - -define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { -; GENERIC-LABEL: test_maskmovdqu: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maskmovdqu: -; ATOM: # %bb.0: -; ATOM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maskmovdqu: -; SLM: # %bb.0: -; SLM-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maskmovdqu: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maskmovdqu: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maskmovdqu: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maskmovdqu: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maskmovdqu: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maskmovdqu: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maskmovdqu: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maskmovdqu: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maskmovdqu: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maskmovdqu: -; SKX: # %bb.0: -; SKX-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maskmovdqu: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maskmovdqu: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maskmovdqu: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maskmovdqu: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maskmovdqu: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maskmovdqu: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [100:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) - ret void -} -declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind - -define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_maxpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maxpd: -; ATOM: # %bb.0: -; ATOM-NEXT: maxpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: maxpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maxpd: -; SLM: # %bb.0: -; SLM-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: maxpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maxpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maxpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maxpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maxpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maxpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maxpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxpd: -; SKX: # %bb.0: -; SKX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maxpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maxpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maxpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maxpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maxpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: maxpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maxpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_maxsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_maxsd: -; ATOM: # %bb.0: -; ATOM-NEXT: maxsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: maxsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_maxsd: -; SLM: # %bb.0: -; SLM-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: maxsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_maxsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_maxsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_maxsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_maxsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_maxsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_maxsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_maxsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_maxsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_maxsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_maxsd: -; SKX: # %bb.0: -; SKX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_maxsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_maxsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_maxsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_maxsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_maxsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: maxsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_maxsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_minpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_minpd: -; ATOM: # %bb.0: -; ATOM-NEXT: minpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: minpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_minpd: -; SLM: # %bb.0: -; SLM-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: minpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_minpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_minpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_minpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_minpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_minpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_minpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minpd: -; SKX: # %bb.0: -; SKX-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_minpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_minpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_minpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_minpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_minpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: minpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_minpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_minsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_minsd: -; ATOM: # %bb.0: -; ATOM-NEXT: minsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: minsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_minsd: -; SLM: # %bb.0: -; SLM-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: minsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_minsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_minsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_minsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_minsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_minsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_minsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_minsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_minsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_minsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_minsd: -; SKX: # %bb.0: -; SKX-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_minsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] -; BDVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_minsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BDVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_minsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_minsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_minsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: minsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_minsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %1, <2 x double> %2) - ret <2 x double> %3 -} -declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone - -define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_movapd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movapd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movapd: -; SLM: # %bb.0: -; SLM-NEXT: movapd (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movapd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movapd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movapd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movapd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movapd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movapd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movapd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movapd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movapd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movapd: -; SKX: # %bb.0: -; SKX-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movapd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movapd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movapd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movapd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovapd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movapd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movapd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movapd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovapd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x double>, <2 x double> *%a0, align 16 - %2 = fadd <2 x double> %1, %1 - store <2 x double> %2, <2 x double> *%a1, align 16 - ret void -} - -define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_movdqa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movdqa: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movdqa: -; SLM: # %bb.0: -; SLM-NEXT: movdqa (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movdqa: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movdqa: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movdqa: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movdqa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movdqa: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movdqa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movdqa: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movdqa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movdqa: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movdqa: -; SKX: # %bb.0: -; SKX-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movdqa: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movdqa: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movdqa: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movdqa: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovdqa (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movdqa: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqa %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movdqa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovdqa (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a0, align 16 - %2 = add <2 x i64> %1, %1 - store <2 x i64> %2, <2 x i64> *%a1, align 16 - ret void -} - -define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_movdqu: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movdqu: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movdqu %xmm0, (%rsi) # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movdqu: -; SLM: # %bb.0: -; SLM-NEXT: movdqu (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movdqu: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movdqu: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movdqu: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movdqu: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movdqu: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movdqu: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movdqu: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movdqu: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movdqu: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movdqu: -; SKX: # %bb.0: -; SKX-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movdqu: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movdqu: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movdqu: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movdqu: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovdqu (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movdqu: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqu (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqu %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movdqu: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovdqu (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x i64>, <2 x i64> *%a0, align 1 - %2 = add <2 x i64> %1, %1 - store <2 x i64> %2, <2 x i64> *%a1, align 1 - ret void -} - -define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { -; GENERIC-LABEL: test_movd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; GENERIC-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; GENERIC-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movd: -; ATOM: # %bb.0: -; ATOM-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [1:1.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movd %xmm1, %eax # sched: [3:3.00] -; ATOM-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; ATOM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movd: -; SLM: # %bb.0: -; SLM-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [3:1.00] -; SLM-NEXT: movd %edi, %xmm1 # sched: [1:0.50] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: movd %xmm2, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; SANDY-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; SANDY-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] -; HASWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movd %xmm2, %eax # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; BROADWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] -; BROADWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movd %edi, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; SKX-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movd: -; SKX: # %bb.0: -; SKX-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] -; SKX-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [10:0.50] -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movd %xmm2, %eax # sched: [10:1.00] -; BDVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovd %edi, %xmm1 # sched: [10:0.50] -; BDVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovd %xmm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [8:0.50] -; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [4:1.00] -; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [8:0.50] -; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [4:1.00] -; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: movd %edi, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movd %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movd %xmm2, %eax # sched: [2:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [8:0.50] -; ZNVER1-NEXT: vmovd %edi, %xmm1 # sched: [3:1.00] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vmovd %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <4 x i32> undef, i32 %a1, i32 0 - %2 = load i32, i32 *%a2 - %3 = insertelement <4 x i32> undef, i32 %2, i32 0 - %4 = add <4 x i32> %a0, %1 - %5 = add <4 x i32> %a0, %3 - %6 = extractelement <4 x i32> %4, i32 0 - %7 = extractelement <4 x i32> %5, i32 0 - store i32 %6, i32* %a2 - ret i32 %7 -} - -define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { -; GENERIC-LABEL: test_movd_64: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; GENERIC-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movd_64: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: movq %rdi, %xmm2 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm2 # sched: [2:1.00] -; ATOM-NEXT: movq %xmm1, %rax # sched: [3:3.00] -; ATOM-NEXT: movq %xmm2, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movd_64: -; SLM: # %bb.0: -; SLM-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: movq %rdi, %xmm1 # sched: [1:0.50] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SLM-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: movq %xmm2, %rax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movd_64: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; SANDY-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; SANDY-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movd_64: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movd_64: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] -; HASWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movd_64: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movd_64: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movq %xmm2, %rax # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movd_64: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; BROADWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] -; BROADWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movd_64: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movd_64: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKYLAKE-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movd_64: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] -; SKX-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; SKX-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movd_64: -; SKX: # %bb.0: -; SKX-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SKX-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.33] -; SKX-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SKX-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movd_64: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [10:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movq %xmm2, %rax # sched: [10:1.00] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movd_64: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq %rdi, %xmm1 # sched: [10:0.50] -; BDVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovq %xmm0, %rax # sched: [10:1.00] -; BDVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movd_64: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [8:0.50] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [4:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movd_64: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [8:0.50] -; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [4:1.00] -; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movd_64: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movd_64: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vmovq %rdi, %xmm1 # sched: [3:1.00] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vmovq %xmm1, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <2 x i64> undef, i64 %a1, i64 0 - %2 = load i64, i64 *%a2 - %3 = insertelement <2 x i64> undef, i64 %2, i64 0 - %4 = add <2 x i64> %a0, %1 - %5 = add <2 x i64> %a0, %3 - %6 = extractelement <2 x i64> %4, i64 0 - %7 = extractelement <2 x i64> %5, i64 0 - store i64 %6, i64* %a2 - ret i64 %7 -} - -define <2 x double> @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { -; GENERIC-LABEL: test_movhpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movhpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movhpd %xmm2, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movhpd: -; SLM: # %bb.0: -; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movhpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movhpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movhpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movhpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movhpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movhpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movhpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movhpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movhpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movhpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movhpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movhpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movhpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movhpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movhpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movhpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast x86_mmx* %a2 to double* - %2 = load double, double *%1, align 8 - %3 = insertelement <2 x double> %a1, double %2, i32 1 - %4 = fadd <2 x double> %a0, %3 - %5 = extractelement <2 x double> %4, i32 1 - store double %5, double* %1 - ret <2 x double> %3 -} - -define <2 x double> @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { -; GENERIC-LABEL: test_movlpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movlpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movlpd %xmm2, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movlpd: -; SLM: # %bb.0: -; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movlpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movlpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movlpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movlpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movlpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movlpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movlpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movlpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movlpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movlpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movlpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movlpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movlpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movlpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movlpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movlpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast x86_mmx* %a2 to double* - %2 = load double, double *%1, align 8 - %3 = insertelement <2 x double> %a1, double %2, i32 0 - %4 = fadd <2 x double> %a0, %3 - %5 = extractelement <2 x double> %4, i32 0 - store double %5, double* %1 - ret <2 x double> %3 -} - -define i32 @test_movmskpd(<2 x double> %a0) { -; GENERIC-LABEL: test_movmskpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movmskpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movmskpd %xmm0, %eax # sched: [3:3.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movmskpd: -; SLM: # %bb.0: -; SLM-NEXT: movmskpd %xmm0, %eax # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movmskpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movmskpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movmskpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movmskpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movmskpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movmskpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movmskpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movmskpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movmskpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movmskpd: -; SKX: # %bb.0: -; SKX-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movmskpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movmskpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movmskpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movmskpd %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movmskpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movmskpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movmskpd %xmm0, %eax # sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movmskpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovmskpd %xmm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone - -define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { -; GENERIC-LABEL: test_movntdqa: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movntdqa: -; ATOM: # %bb.0: -; ATOM-NEXT: paddq %xmm0, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movntdqa: -; SLM: # %bb.0: -; SLM-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movntdqa: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntdqa: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movntdqa: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movntdqa: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movntdqa: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntdqa: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movntdqa: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntdqa: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movntdqa: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntdqa: -; SKX: # %bb.0: -; SKX-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movntdqa: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movntdqa: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movntdqa: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movntdqa: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovntdq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movntdqa: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movntdq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movntdqa: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <2 x i64> %a0, %a0 - store <2 x i64> %1, <2 x i64> *%a1, align 16, !nontemporal !0 - ret void -} - -define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_movntpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movntpd: -; ATOM: # %bb.0: -; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movntpd: -; SLM: # %bb.0: -; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movntpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movntpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movntpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movntpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movntpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movntpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movntpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movntpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movntpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movntpd: -; SKX: # %bb.0: -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movntpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movntpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movntpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movntpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovntpd %xmm0, (%rdi) # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movntpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movntpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movntpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fadd <2 x double> %a0, %a0 - store <2 x double> %1, <2 x double> *%a1, align 16, !nontemporal !0 - ret void -} - -define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { -; GENERIC-LABEL: test_movq_mem: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movq_mem: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movq_mem: -; SLM: # %bb.0: -; SLM-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movq_mem: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movq_mem: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movq_mem: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movq_mem: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movq_mem: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movq_mem: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movq_mem: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movq_mem: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movq_mem: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movq_mem: -; SKX: # %bb.0: -; SKX-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movq_mem: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movq_mem: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movq_mem: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movq_mem: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovq %xmm0, (%rdi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movq_mem: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movq_mem: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vmovq %xmm0, (%rdi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load i64, i64* %a1, align 1 - %2 = insertelement <2 x i64> zeroinitializer, i64 %1, i32 0 - %3 = add <2 x i64> %a0, %2 - %4 = extractelement <2 x i64> %3, i32 0 - store i64 %4, i64 *%a1, align 1 - ret <2 x i64> %3 -} - -define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { -; GENERIC-LABEL: test_movq_reg: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movq_reg: -; ATOM: # %bb.0: -; ATOM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movq_reg: -; SLM: # %bb.0: -; SLM-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movq_reg: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movq_reg: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movq_reg: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movq_reg: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movq_reg: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movq_reg: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; BROADWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movq_reg: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movq_reg: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKYLAKE-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movq_reg: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movq_reg: -; SKX: # %bb.0: -; SKX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] -; SKX-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movq_reg: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movq_reg: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [2:0.50] -; BDVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movq_reg: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movq_reg: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.50] -; BTVER2-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movq_reg: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movq_reg: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.25] -; ZNVER1-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> - %2 = add <2 x i64> %a1, %1 - ret <2 x i64> %2 -} - -define void @test_movsd_mem(double* %a0, double* %a1) { -; GENERIC-LABEL: test_movsd_mem: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; GENERIC-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movsd_mem: -; ATOM: # %bb.0: -; ATOM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [1:1.00] -; ATOM-NEXT: addsd %xmm0, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movsd_mem: -; SLM: # %bb.0: -; SLM-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [3:1.00] -; SLM-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movsd_mem: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; SANDY-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movsd_mem: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] -; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movsd_mem: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; HASWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movsd_mem: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; HASWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movsd_mem: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BROADWELL-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movsd_mem: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BROADWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movsd_mem: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKYLAKE-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movsd_mem: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKYLAKE-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movsd_mem: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movsd_mem: -; SKX: # %bb.0: -; SKX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; SKX-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movsd_mem: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BDVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movsd_mem: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:0.50] -; BDVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movsd_mem: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movsd_mem: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovsd %xmm0, (%rsi) # sched: [2:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movsd_mem: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movsd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movsd_mem: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [8:0.50] -; ZNVER1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load double, double* %a0, align 1 - %2 = fadd double %1, %1 - store double %2, double *%a1, align 1 - ret void -} - -define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { -; GENERIC-LABEL: test_movsd_reg: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movsd_reg: -; ATOM: # %bb.0: -; ATOM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movsd_reg: -; SLM: # %bb.0: -; SLM-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movsd_reg: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movsd_reg: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movsd_reg: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movsd_reg: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movsd_reg: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movsd_reg: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movsd_reg: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movsd_reg: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movsd_reg: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movsd_reg: -; SKX: # %bb.0: -; SKX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movsd_reg: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movsd_reg: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movsd_reg: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movsd_reg: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movsd_reg: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movsd_reg: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 2, i32 0> - ret <2 x double> %1 -} - -define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_movupd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_movupd: -; ATOM: # %bb.0: -; ATOM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: addpd %xmm0, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: movupd %xmm0, (%rsi) # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_movupd: -; SLM: # %bb.0: -; SLM-NEXT: movupd (%rdi), %xmm0 # sched: [3:1.00] -; SLM-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SLM-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_movupd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_movupd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_movupd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_movupd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_movupd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_movupd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_movupd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_movupd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_movupd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_movupd: -; SKX: # %bb.0: -; SKX-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_movupd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_movupd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_movupd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_movupd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovupd (%rdi), %xmm0 # sched: [5:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_movupd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movupd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movupd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_movupd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovupd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = load <2 x double>, <2 x double> *%a0, align 1 - %2 = fadd <2 x double> %1, %1 - store <2 x double> %2, <2 x double> *%a1, align 1 - ret void -} - -define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_mulpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mulpd: -; ATOM: # %bb.0: -; ATOM-NEXT: mulpd %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: mulpd (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mulpd: -; SLM: # %bb.0: -; SLM-NEXT: mulpd %xmm1, %xmm0 # sched: [5:2.00] -; SLM-NEXT: mulpd (%rdi), %xmm0 # sched: [8:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mulpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mulpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [11:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mulpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mulpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] -; BROADWELL-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [8:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mulpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mulpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulpd: -; SKX: # %bb.0: -; SKX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mulpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mulpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mulpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [4:2.00] -; BTVER2-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [9:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mulpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BTVER2-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mulpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mulpd %xmm1, %xmm0 # sched: [3:0.50] -; ZNVER1-SSE-NEXT: mulpd (%rdi), %xmm0 # sched: [10:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mulpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; ZNVER1-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fmul <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_mulsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_mulsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_mulsd: -; ATOM: # %bb.0: -; ATOM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: mulsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_mulsd: -; SLM: # %bb.0: -; SLM-NEXT: mulsd %xmm1, %xmm0 # sched: [5:2.00] -; SLM-NEXT: mulsd (%rdi), %xmm0 # sched: [8:2.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_mulsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_mulsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_mulsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_mulsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_mulsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] -; BROADWELL-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [8:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_mulsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BROADWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_mulsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_mulsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_mulsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_mulsd: -; SKX: # %bb.0: -; SKX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_mulsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_mulsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_mulsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [4:2.00] -; BTVER2-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [9:2.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_mulsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [4:2.00] -; BTVER2-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_mulsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: mulsd %xmm1, %xmm0 # sched: [3:0.50] -; ZNVER1-SSE-NEXT: mulsd (%rdi), %xmm0 # sched: [10:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_mulsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; ZNVER1-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fmul double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fmul double %1, %2 - ret double %3 -} - -define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_orpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_orpd: -; ATOM: # %bb.0: -; ATOM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: orpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_orpd: -; SLM: # %bb.0: -; SLM-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: orpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_orpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_orpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_orpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_orpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_orpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_orpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_orpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_orpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_orpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_orpd: -; SKX: # %bb.0: -; SKX-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_orpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_orpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_orpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_orpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_orpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: orpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_orpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = or <4 x i32> %1, %2 - %4 = load <2 x double>, <2 x double> *%a2, align 16 - %5 = bitcast <2 x double> %4 to <4 x i32> - %6 = or <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <2 x double> - %8 = fadd <2 x double> %a1, %7 - ret <2 x double> %8 -} - -define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_packssdw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packssdw: -; ATOM: # %bb.0: -; ATOM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: packssdw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packssdw: -; SLM: # %bb.0: -; SLM-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: packssdw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_packssdw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_packssdw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_packssdw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_packssdw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_packssdw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packssdw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_packssdw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packssdw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_packssdw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packssdw: -; SKX: # %bb.0: -; SKX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_packssdw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_packssdw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_packssdw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_packssdw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_packssdw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: packssdw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_packssdw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) - %2 = bitcast <8 x i16> %1 to <4 x i32> - %3 = load <4 x i32>, <4 x i32> *%a2, align 16 - %4 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %2, <4 x i32> %3) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone - -define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_packsswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packsswb: -; ATOM: # %bb.0: -; ATOM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: packsswb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packsswb: -; SLM: # %bb.0: -; SLM-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: packsswb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_packsswb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_packsswb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_packsswb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_packsswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_packsswb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packsswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_packsswb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packsswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_packsswb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packsswb: -; SKX: # %bb.0: -; SKX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_packsswb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_packsswb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_packsswb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_packsswb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_packsswb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: packsswb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_packsswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = bitcast <16 x i8> %1 to <8 x i16> - %3 = load <8 x i16>, <8 x i16> *%a2, align 16 - %4 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %2, <8 x i16> %3) - ret <16 x i8> %4 -} -declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_packuswb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_packuswb: -; ATOM: # %bb.0: -; ATOM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: packuswb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_packuswb: -; SLM: # %bb.0: -; SLM-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: packuswb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_packuswb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_packuswb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_packuswb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_packuswb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_packuswb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_packuswb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_packuswb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; SKYLAKE-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_packuswb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKYLAKE-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_packuswb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:1.00] -; SKX-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_packuswb: -; SKX: # %bb.0: -; SKX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SKX-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_packuswb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_packuswb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_packuswb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_packuswb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_packuswb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: packuswb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_packuswb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) - %2 = bitcast <16 x i8> %1 to <8 x i16> - %3 = load <8 x i16>, <8 x i16> *%a2, align 16 - %4 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %2, <8 x i16> %3) - ret <16 x i8> %4 -} -declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_paddb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddb: -; SLM: # %bb.0: -; SLM-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = add <16 x i8> %1, %2 - ret <16 x i8> %3 -} - -define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_paddd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddd: -; ATOM: # %bb.0: -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddd: -; SLM: # %bb.0: -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddd: -; SKX: # %bb.0: -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = add <4 x i32> %1, %2 - ret <4 x i32> %3 -} - -define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_paddq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddq: -; ATOM: # %bb.0: -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: paddq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddq: -; SLM: # %bb.0: -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddq: -; SKX: # %bb.0: -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = add <2 x i64> %1, %2 - ret <2 x i64> %3 -} - -define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_paddsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddsb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddsb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddsb: -; SLM: # %bb.0: -; SLM-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddsb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddsb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddsb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddsb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddsb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddsb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddsb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddsb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddsb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddsb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddsb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_paddsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddsw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddsw: -; SLM: # %bb.0: -; SLM-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddsw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_paddusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddusb: -; ATOM: # %bb.0: -; ATOM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddusb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddusb: -; SLM: # %bb.0: -; SLM-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddusb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddusb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddusb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddusb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddusb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddusb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddusb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusb: -; SKX: # %bb.0: -; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddusb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddusb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddusb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddusb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddusb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddusb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_paddusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddusw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddusw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddusw: -; SLM: # %bb.0: -; SLM-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddusw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddusw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddusw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddusw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddusw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddusw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddusw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddusw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddusw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddusw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddusw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddusw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddusw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddusw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_paddw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_paddw: -; ATOM: # %bb.0: -; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: paddw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_paddw: -; SLM: # %bb.0: -; SLM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: paddw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_paddw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_paddw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_paddw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_paddw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_paddw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_paddw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_paddw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_paddw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_paddw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_paddw: -; SKX: # %bb.0: -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_paddw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_paddw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_paddw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_paddw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_paddw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: paddw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_paddw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = add <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = add <8 x i16> %1, %2 - ret <8 x i16> %3 -} - -define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pand: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pand: -; ATOM: # %bb.0: -; ATOM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pand (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pand: -; SLM: # %bb.0: -; SLM-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pand (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pand: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pand: -; SANDY: # %bb.0: -; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pand: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pand: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pand: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pand: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pand: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pand: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pand: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pand: -; SKX: # %bb.0: -; SKX-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pand: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pand: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pand: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pand (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pand: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pand: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pand %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pand (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pand: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = and <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = and <2 x i64> %1, %2 - %4 = add <2 x i64> %3, %a1 - ret <2 x i64> %4 -} - -define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pandn: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; GENERIC-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pandn: -; ATOM: # %bb.0: -; ATOM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pandn (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm0, %xmm1 # sched: [2:1.00] -; ATOM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pandn: -; SLM: # %bb.0: -; SLM-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pandn (%rdi), %xmm1 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pandn: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; SANDY-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pandn: -; SANDY: # %bb.0: -; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pandn: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pandn: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pandn: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pandn: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pandn: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pandn: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pandn: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pandn: -; SKX: # %bb.0: -; SKX-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pandn: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pandn: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pandn: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pandn: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pandn: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pandn (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pandn: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = xor <2 x i64> %a0, <i64 -1, i64 -1> - %2 = and <2 x i64> %a1, %1 - %3 = load <2 x i64>, <2 x i64> *%a2, align 16 - %4 = xor <2 x i64> %2, <i64 -1, i64 -1> - %5 = and <2 x i64> %3, %4 - %6 = add <2 x i64> %2, %5 - ret <2 x i64> %6 -} - -define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pavgb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pavgb: -; ATOM: # %bb.0: -; ATOM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pavgb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pavgb: -; SLM: # %bb.0: -; SLM-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pavgb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pavgb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pavgb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pavgb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pavgb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pavgb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pavgb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pavgb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgb: -; SKX: # %bb.0: -; SKX-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pavgb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pavgb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pavgb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pavgb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pavgb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pavgb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pavgb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <16 x i8> %a0 to <16 x i16> - %2 = zext <16 x i8> %a1 to <16 x i16> - %3 = add <16 x i16> %1, %2 - %4 = add <16 x i16> %3, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> - %5 = lshr <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> - %6 = trunc <16 x i16> %5 to <16 x i8> - %7 = load <16 x i8>, <16 x i8> *%a2, align 16 - %8 = zext <16 x i8> %6 to <16 x i16> - %9 = zext <16 x i8> %7 to <16 x i16> - %10 = add <16 x i16> %8, %9 - %11 = add <16 x i16> %10, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> - %12 = lshr <16 x i16> %11, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> - %13 = trunc <16 x i16> %12 to <16 x i8> - ret <16 x i8> %13 -} - -define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pavgw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pavgw: -; ATOM: # %bb.0: -; ATOM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pavgw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pavgw: -; SLM: # %bb.0: -; SLM-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pavgw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pavgw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pavgw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pavgw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pavgw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pavgw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pavgw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pavgw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pavgw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pavgw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pavgw: -; SKX: # %bb.0: -; SKX-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pavgw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pavgw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pavgw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pavgw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pavgw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pavgw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pavgw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = zext <8 x i16> %a0 to <8 x i32> - %2 = zext <8 x i16> %a1 to <8 x i32> - %3 = add <8 x i32> %1, %2 - %4 = add <8 x i32> %3, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> - %5 = lshr <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> - %6 = trunc <8 x i32> %5 to <8 x i16> - %7 = load <8 x i16>, <8 x i16> *%a2, align 16 - %8 = zext <8 x i16> %6 to <8 x i32> - %9 = zext <8 x i16> %7 to <8 x i32> - %10 = add <8 x i32> %8, %9 - %11 = add <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> - %12 = lshr <8 x i32> %11, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> - %13 = trunc <8 x i32> %12 to <8 x i16> - ret <8 x i16> %13 -} - -define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpeqb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqb: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqb: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpeqb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpeqb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpeqb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpeqb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpeqb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpeqb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpeqb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqb: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpeqb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpeqb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpeqb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpeqb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpeqb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpeqb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = icmp eq <16 x i8> %a0, %2 - %4 = or <16 x i1> %1, %3 - %5 = sext <16 x i1> %4 to <16 x i8> - ret <16 x i8> %5 -} - -define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pcmpeqd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqd: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqd: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpeqd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpeqd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpeqd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpeqd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpeqd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpeqd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpeqd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqd: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpeqd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpeqd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpeqd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpeqd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpeqd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpeqd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = icmp eq <4 x i32> %a0, %2 - %4 = or <4 x i1> %1, %3 - %5 = sext <4 x i1> %4 to <4 x i32> - ret <4 x i32> %5 -} - -define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pcmpeqw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpeqw: -; ATOM: # %bb.0: -; ATOM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpeqw: -; SLM: # %bb.0: -; SLM-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpeqw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpeqw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpeqw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpeqw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpeqw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpeqw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpeqw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpeqw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpeqw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpeqw: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpeqw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpeqw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpeqw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpeqw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpeqw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpeqw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp eq <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = icmp eq <8 x i16> %a0, %2 - %4 = or <8 x i1> %1, %3 - %5 = sext <8 x i1> %4 to <8 x i16> - ret <8 x i16> %5 -} - -define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pcmpgtb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; GENERIC-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtb: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtb: -; SLM: # %bb.0: -; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpgtb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpgtb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpgtb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpgtb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpgtb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpgtb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpgtb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtb: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpgtb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpgtb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpgtb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpgtb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpgtb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpgtb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = icmp sgt <16 x i8> %a0, %2 - %4 = or <16 x i1> %1, %3 - %5 = sext <16 x i1> %4 to <16 x i8> - ret <16 x i8> %5 -} - -define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pcmpgtd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; GENERIC-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtd: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtd: -; SLM: # %bb.0: -; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpgtd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpgtd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpgtd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpgtd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpgtd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpgtd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpgtd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtd: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpgtd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpgtd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpgtd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpgtd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpgtd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpgtd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = icmp eq <4 x i32> %a0, %2 - %4 = or <4 x i1> %1, %3 - %5 = sext <4 x i1> %4 to <4 x i32> - ret <4 x i32> %5 -} - -define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pcmpgtw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; GENERIC-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; GENERIC-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pcmpgtw: -; ATOM: # %bb.0: -; ATOM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pcmpgtw: -; SLM: # %bb.0: -; SLM-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SLM-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pcmpgtw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SANDY-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SANDY-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pcmpgtw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pcmpgtw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pcmpgtw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pcmpgtw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pcmpgtw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BROADWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pcmpgtw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pcmpgtw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKYLAKE-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pcmpgtw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; SKX-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pcmpgtw: -; SKX: # %bb.0: -; SKX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SKX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pcmpgtw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pcmpgtw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [2:0.50] -; BDVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pcmpgtw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pcmpgtw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pcmpgtw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pcmpgtw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = icmp sgt <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = icmp sgt <8 x i16> %a0, %2 - %4 = or <8 x i1> %1, %3 - %5 = sext <8 x i1> %4 to <8 x i16> - ret <8 x i16> %5 -} - -define i16 @test_pextrw(<8 x i16> %a0) { -; GENERIC-LABEL: test_pextrw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; GENERIC-NEXT: # kill: def $ax killed $ax killed $eax -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pextrw: -; ATOM: # %bb.0: -; ATOM-NEXT: pextrw $6, %xmm0, %eax # sched: [4:2.00] -; ATOM-NEXT: # kill: def $ax killed $ax killed $eax -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pextrw: -; SLM: # %bb.0: -; SLM-NEXT: pextrw $6, %xmm0, %eax # sched: [1:1.00] -; SLM-NEXT: # kill: def $ax killed $ax killed $eax -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pextrw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; SANDY-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pextrw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; SANDY-NEXT: # kill: def $ax killed $ax killed $eax -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pextrw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] -; HASWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pextrw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: # kill: def $ax killed $ax killed $eax -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pextrw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pextrw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] -; BROADWELL-NEXT: # kill: def $ax killed $ax killed $eax -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pextrw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pextrw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pextrw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKX-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pextrw: -; SKX: # %bb.0: -; SKX-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; SKX-NEXT: # kill: def $ax killed $ax killed $eax -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pextrw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pextrw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pextrw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pextrw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: # kill: def $ax killed $ax killed $eax -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pextrw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pextrw $6, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-SSE-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pextrw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:2.00] -; ZNVER1-NEXT: # kill: def $ax killed $ax killed $eax -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = extractelement <8 x i16> %a0, i32 6 - ret i16 %1 -} - -define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { -; GENERIC-LABEL: test_pinsrw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pinsrw: -; ATOM: # %bb.0: -; ATOM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00] -; ATOM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pinsrw: -; SLM: # %bb.0: -; SLM-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:1.00] -; SLM-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pinsrw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pinsrw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pinsrw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; HASWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pinsrw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pinsrw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; BROADWELL-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pinsrw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; BROADWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pinsrw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; SKYLAKE-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pinsrw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKYLAKE-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pinsrw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:2.00] -; SKX-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pinsrw: -; SKX: # %bb.0: -; SKX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; SKX-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pinsrw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [12:0.50] -; BDVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [6:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pinsrw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [12:0.50] -; BDVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [6:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pinsrw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [7:0.50] -; BTVER2-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [4:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pinsrw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [7:0.50] -; BTVER2-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [4:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pinsrw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pinsrw $1, %edi, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pinsrw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = insertelement <8 x i16> %a0, i16 %a1, i32 1 - %2 = load i16, i16 *%a2 - %3 = insertelement <8 x i16> %1, i16 %2, i32 3 - ret <8 x i16> %3 -} - -define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmaddwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaddwd: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaddwd: -; SLM: # %bb.0: -; SLM-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaddwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaddwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaddwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaddwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaddwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaddwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaddwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaddwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaddwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaddwd: -; SKX: # %bb.0: -; SKX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaddwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaddwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaddwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaddwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaddwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaddwd %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmaddwd (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaddwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) - %2 = bitcast <4 x i32> %1 to <8 x i16> - %3 = load <8 x i16>, <8 x i16> *%a2, align 16 - %4 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %2, <8 x i16> %3) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmaxsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaxsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaxsw: -; SLM: # %bb.0: -; SLM-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxsw: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pmaxub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmaxub: -; ATOM: # %bb.0: -; ATOM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pmaxub (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmaxub: -; SLM: # %bb.0: -; SLM-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pmaxub (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmaxub: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmaxub: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmaxub: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmaxub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmaxub: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmaxub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmaxub: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmaxub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmaxub: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmaxub: -; SKX: # %bb.0: -; SKX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmaxub: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmaxub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmaxub: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmaxub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmaxub: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pmaxub (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmaxub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pminsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pminsw: -; ATOM: # %bb.0: -; ATOM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pminsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pminsw: -; SLM: # %bb.0: -; SLM-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminsw: -; SKX: # %bb.0: -; SKX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_pminub: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pminub: -; ATOM: # %bb.0: -; ATOM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pminub (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pminub: -; SLM: # %bb.0: -; SLM-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pminub (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pminub: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pminub: -; SANDY: # %bb.0: -; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pminub: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pminub: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pminub: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pminub: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pminub: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pminub: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pminub: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pminub: -; SKX: # %bb.0: -; SKX-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pminub: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pminub: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pminub: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pminub: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pminub: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pminub (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pminub: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone - -define i32 @test_pmovmskb(<16 x i8> %a0) { -; GENERIC-LABEL: test_pmovmskb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmovmskb: -; ATOM: # %bb.0: -; ATOM-NEXT: pmovmskb %xmm0, %eax # sched: [3:3.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmovmskb: -; SLM: # %bb.0: -; SLM-NEXT: pmovmskb %xmm0, %eax # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmovmskb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmovmskb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmovmskb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmovmskb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmovmskb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmovmskb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmovmskb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmovmskb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmovmskb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmovmskb: -; SKX: # %bb.0: -; SKX-NEXT: vpmovmskb %xmm0, %eax # sched: [2:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmovmskb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [13:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmovmskb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [13:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmovmskb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmovmskb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmovmskb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmovmskb %xmm0, %eax # sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmovmskb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) - ret i32 %1 -} -declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone - -define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhuw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhuw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhuw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmulhuw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmulhuw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmulhuw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmulhuw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmulhuw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhuw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmulhuw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhuw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmulhuw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhuw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmulhuw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmulhuw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmulhuw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmulhuw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmulhuw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmulhuw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmulhuw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmulhuw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmulhw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmulhw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmulhw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmulhw: -; SLM: # %bb.0: -; SLM-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmulhw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmulhw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmulhw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmulhw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmulhw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmulhw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmulhw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmulhw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmulhw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmulhw: -; SKX: # %bb.0: -; SKX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmulhw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmulhw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmulhw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmulhw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmulhw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmulhw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmulhw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmulhw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_pmullw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmullw: -; ATOM: # %bb.0: -; ATOM-NEXT: pmullw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmullw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmullw: -; SLM: # %bb.0: -; SLM-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmullw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmullw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmullw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmullw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmullw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmullw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmullw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmullw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmullw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmullw: -; SKX: # %bb.0: -; SKX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmullw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmullw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmullw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmullw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmullw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmullw %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmullw (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmullw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = mul <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = mul <8 x i16> %1, %2 - ret <8 x i16> %3 -} - -define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pmuludq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pmuludq: -; ATOM: # %bb.0: -; ATOM-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: pmuludq (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pmuludq: -; SLM: # %bb.0: -; SLM-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pmuludq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pmuludq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pmuludq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pmuludq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pmuludq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pmuludq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pmuludq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pmuludq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pmuludq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pmuludq: -; SKX: # %bb.0: -; SKX-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pmuludq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] -; BDVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [9:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pmuludq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; BDVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pmuludq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [2:1.00] -; BTVER2-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pmuludq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BTVER2-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pmuludq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pmuludq %xmm1, %xmm0 # sched: [4:1.00] -; ZNVER1-SSE-NEXT: pmuludq (%rdi), %xmm0 # sched: [11:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pmuludq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; ZNVER1-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) - %2 = bitcast <2 x i64> %1 to <4 x i32> - %3 = load <4 x i32>, <4 x i32> *%a2, align 16 - %4 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %2, <4 x i32> %3) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone - -define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_por: -; GENERIC: # %bb.0: -; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_por: -; ATOM: # %bb.0: -; ATOM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: por (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_por: -; SLM: # %bb.0: -; SLM-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: por (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_por: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_por: -; SANDY: # %bb.0: -; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_por: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_por: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_por: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_por: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_por: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_por: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_por: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_por: -; SKX: # %bb.0: -; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_por: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_por: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_por: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: por (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_por: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_por: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: por (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_por: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = or <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = or <2 x i64> %1, %2 - %4 = add <2 x i64> %3, %a1 - ret <2 x i64> %4 -} - -define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psadbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; GENERIC-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psadbw: -; ATOM: # %bb.0: -; ATOM-NEXT: psadbw %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: psadbw (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psadbw: -; SLM: # %bb.0: -; SLM-NEXT: psadbw %xmm1, %xmm0 # sched: [4:1.00] -; SLM-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psadbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psadbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psadbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; HASWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [11:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psadbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psadbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [5:1.00] -; BROADWELL-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psadbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BROADWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psadbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] -; SKYLAKE-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psadbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKYLAKE-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psadbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] -; SKX-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psadbw: -; SKX: # %bb.0: -; SKX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SKX-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psadbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [4:0.50] -; BDVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [9:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psadbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; BDVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psadbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psadbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psadbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: psadbw (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psadbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) - %2 = bitcast <2 x i64> %1 to <16 x i8> - %3 = load <16 x i8>, <16 x i8> *%a2, align 16 - %4 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %2, <16 x i8> %3) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone - -define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { -; GENERIC-LABEL: test_pshufd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] -; GENERIC-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshufd: -; ATOM: # %bb.0: -; ATOM-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; ATOM-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [1:1.00] -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshufd: -; SLM: # %bb.0: -; SLM-NEXT: pshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [4:1.00] -; SLM-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SLM-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pshufd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pshufd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pshufd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; HASWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pshufd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pshufd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; BROADWELL-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; BROADWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pshufd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pshufd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:1.00] -; SKX-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufd: -; SKX: # %bb.0: -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; SKX-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pshufd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [2:0.50] -; BDVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pshufd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] -; BDVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [2:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pshufd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] -; BTVER2-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pshufd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [6:1.00] -; BTVER2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pshufd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.25] -; ZNVER1-SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pshufd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [8:0.50] -; ZNVER1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.25] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> - %2 = load <4 x i32>, <4 x i32> *%a1, align 16 - %3 = shufflevector <4 x i32> %2, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pshufhw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; GENERIC-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshufhw: -; ATOM: # %bb.0: -; ATOM-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; ATOM-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] -; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshufhw: -; SLM: # %bb.0: -; SLM-NEXT: pshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [4:1.00] -; SLM-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pshufhw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; SANDY-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pshufhw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pshufhw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; HASWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pshufhw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pshufhw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; BROADWELL-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshufhw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; BROADWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pshufhw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshufhw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKYLAKE-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pshufhw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKX-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshufhw: -; SKX: # %bb.0: -; SKX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; SKX-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pshufhw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50] -; BDVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pshufhw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] -; BDVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [2:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pshufhw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; BTVER2-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pshufhw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [6:1.00] -; BTVER2-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pshufhw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] -; ZNVER1-SSE-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pshufhw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [8:0.50] -; ZNVER1-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.25] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6> - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 5, i32 4> - %4 = add <8 x i16> %1, %3 - ret <8 x i16> %4 -} - -define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { -; GENERIC-LABEL: test_pshuflw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; GENERIC-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pshuflw: -; ATOM: # %bb.0: -; ATOM-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; ATOM-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] -; ATOM-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pshuflw: -; SLM: # %bb.0: -; SLM-NEXT: pshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [4:1.00] -; SLM-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SLM-NEXT: paddw %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pshuflw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; SANDY-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pshuflw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pshuflw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; HASWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pshuflw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pshuflw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; BROADWELL-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pshuflw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; BROADWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BROADWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pshuflw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pshuflw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKYLAKE-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pshuflw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKX-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKX-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pshuflw: -; SKX: # %bb.0: -; SKX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; SKX-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:1.00] -; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pshuflw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50] -; BDVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pshuflw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] -; BDVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [2:0.50] -; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pshuflw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; BTVER2-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pshuflw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [6:1.00] -; BTVER2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; BTVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pshuflw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] -; ZNVER1-SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pshuflw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [8:0.50] -; ZNVER1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.25] -; ZNVER1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7> - %2 = load <8 x i16>, <8 x i16> *%a1, align 16 - %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> - %4 = add <8 x i16> %1, %3 - ret <8 x i16> %4 -} - -define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_pslld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pslld: -; ATOM: # %bb.0: -; ATOM-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: pslld (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: pslld $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pslld: -; SLM: # %bb.0: -; SLM-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: pslld (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pslld: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pslld: -; SANDY: # %bb.0: -; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pslld: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pslld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pslld: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: pslld $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pslld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pslld: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pslld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pslld: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pslld: -; SKX: # %bb.0: -; SKX-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pslld: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pslld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pslld: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: pslld $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pslld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pslld: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pslld %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: pslld $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pslld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %1, <4 x i32> %2) - %4 = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %3, i32 2) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone - -define <4 x i32> @test_pslldq(<4 x i32> %a0) { -; GENERIC-LABEL: test_pslldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pslldq: -; ATOM: # %bb.0: -; ATOM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pslldq: -; SLM: # %bb.0: -; SLM-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pslldq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pslldq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pslldq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pslldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pslldq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pslldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pslldq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pslldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pslldq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pslldq: -; SKX: # %bb.0: -; SKX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pslldq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pslldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pslldq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pslldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pslldq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pslldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 4, i32 0, i32 1, i32 2> - ret <4 x i32> %1 -} - -define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psllq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psllq: -; ATOM: # %bb.0: -; ATOM-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psllq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psllq $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psllq: -; SLM: # %bb.0: -; SLM-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psllq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psllq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psllq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psllq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psllq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psllq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psllq $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psllq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psllq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllq: -; SKX: # %bb.0: -; SKX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psllq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psllq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psllq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psllq $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psllq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psllq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psllq %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psllq $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psllq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %1, <2 x i64> %2) - %4 = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %3, i32 2) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone -declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone - -define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psllw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psllw: -; ATOM: # %bb.0: -; ATOM-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psllw (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psllw $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psllw: -; SLM: # %bb.0: -; SLM-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psllw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psllw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psllw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psllw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psllw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psllw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psllw $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psllw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psllw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psllw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psllw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psllw: -; SKX: # %bb.0: -; SKX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psllw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psllw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psllw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psllw $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psllw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psllw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psllw %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psllw $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psllw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %1, <8 x i16> %2) - %4 = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %3, i32 2) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone - -define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psrad: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrad: -; ATOM: # %bb.0: -; ATOM-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrad (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrad $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrad: -; SLM: # %bb.0: -; SLM-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrad (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrad: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrad: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrad: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrad: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrad: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrad $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrad: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrad: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrad: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrad: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrad: -; SKX: # %bb.0: -; SKX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrad: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrad: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrad: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrad $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrad: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrad: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrad %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrad $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrad: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> %2) - %4 = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone - -define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psraw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psraw: -; ATOM: # %bb.0: -; ATOM-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psraw (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psraw $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psraw: -; SLM: # %bb.0: -; SLM-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psraw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psraw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psraw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psraw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psraw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psraw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psraw $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psraw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psraw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psraw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psraw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psraw: -; SKX: # %bb.0: -; SKX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psraw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psraw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psraw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psraw $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psraw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psraw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psraw %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psraw $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psraw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> %2) - %4 = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone - -define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psrld: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrld: -; ATOM: # %bb.0: -; ATOM-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrld (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrld $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrld: -; SLM: # %bb.0: -; SLM-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrld (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrld: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrld: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrld: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrld: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrld: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrld $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrld: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrld: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrld: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrld: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrld: -; SKX: # %bb.0: -; SKX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrld: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrld: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrld: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrld $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrld: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrld: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrld %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrld $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrld: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %1, <4 x i32> %2) - %4 = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %3, i32 2) - ret <4 x i32> %4 -} -declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone -declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone - -define <4 x i32> @test_psrldq(<4 x i32> %a0) { -; GENERIC-LABEL: test_psrldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrldq: -; ATOM: # %bb.0: -; ATOM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrldq: -; SLM: # %bb.0: -; SLM-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrldq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrldq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrldq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrldq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrldq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrldq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrldq: -; SKX: # %bb.0: -; SKX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrldq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrldq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrldq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4> - ret <4 x i32> %1 -} - -define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psrlq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrlq: -; ATOM: # %bb.0: -; ATOM-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrlq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrlq: -; SLM: # %bb.0: -; SLM-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrlq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrlq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrlq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrlq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrlq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrlq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrlq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrlq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlq: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrlq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrlq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrlq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrlq $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrlq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrlq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrlq %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrlq $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrlq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %1, <2 x i64> %2) - %4 = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %3, i32 2) - ret <2 x i64> %4 -} -declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone -declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone - -define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psrlw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psrlw: -; ATOM: # %bb.0: -; ATOM-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psrlw (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psrlw: -; SLM: # %bb.0: -; SLM-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] -; SLM-NEXT: psrlw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psrlw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psrlw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psrlw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psrlw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psrlw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; BROADWELL-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] -; BROADWELL-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psrlw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; BROADWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BROADWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psrlw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psrlw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psrlw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psrlw: -; SKX: # %bb.0: -; SKX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SKX-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psrlw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [3:0.50] -; BDVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:0.50] -; BDVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psrlw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [3:0.50] -; BDVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; BDVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psrlw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [7:1.00] -; BTVER2-SSE-NEXT: psrlw $2, %xmm0 # sched: [2:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psrlw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; BTVER2-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [2:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psrlw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psrlw %xmm1, %xmm0 # sched: [1:1.00] -; ZNVER1-SSE-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] -; ZNVER1-SSE-NEXT: psrlw $2, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psrlw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; ZNVER1-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; ZNVER1-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %1, <8 x i16> %2) - %4 = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %3, i32 2) - ret <8 x i16> %4 -} -declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone -declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone - -define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psubb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubb: -; SLM: # %bb.0: -; SLM-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <16 x i8> %a0, %a1 - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = sub <16 x i8> %1, %2 - ret <16 x i8> %3 -} - -define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_psubd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubd: -; ATOM: # %bb.0: -; ATOM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubd: -; SLM: # %bb.0: -; SLM-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubd: -; SKX: # %bb.0: -; SKX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <4 x i32> %a0, %a1 - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = sub <4 x i32> %1, %2 - ret <4 x i32> %3 -} - -define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_psubq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubq: -; ATOM: # %bb.0: -; ATOM-NEXT: psubq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: psubq (%rdi), %xmm0 # sched: [3:1.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubq: -; SLM: # %bb.0: -; SLM-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubq (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubq: -; SKX: # %bb.0: -; SKX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubq (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = sub <2 x i64> %1, %2 - ret <2 x i64> %3 -} - -define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psubsb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubsb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubsb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubsb: -; SLM: # %bb.0: -; SLM-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubsb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubsb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubsb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubsb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubsb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubsb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubsb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubsb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubsb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubsb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubsb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubsb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubsb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubsb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubsb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psubsw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubsw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubsw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubsw: -; SLM: # %bb.0: -; SLM-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubsw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubsw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubsw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubsw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubsw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubsw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubsw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubsw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubsw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubsw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubsw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubsw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubsw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubsw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubsw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubsw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubsw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubsw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_psubusb: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubusb: -; ATOM: # %bb.0: -; ATOM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubusb (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubusb: -; SLM: # %bb.0: -; SLM-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubusb (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubusb: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubusb: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubusb: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubusb: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubusb: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusb: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubusb: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusb: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubusb: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusb: -; SKX: # %bb.0: -; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubusb: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubusb: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubusb: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubusb: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubusb: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubusb (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubusb: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %1, <16 x i8> %2) - ret <16 x i8> %3 -} -declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone - -define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psubusw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubusw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubusw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubusw: -; SLM: # %bb.0: -; SLM-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubusw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubusw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubusw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubusw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubusw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubusw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubusw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubusw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubusw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKYLAKE-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubusw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; SKX-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubusw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SKX-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubusw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubusw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubusw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubusw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubusw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubusw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubusw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %1, <8 x i16> %2) - ret <8 x i16> %3 -} -declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone - -define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_psubw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_psubw: -; ATOM: # %bb.0: -; ATOM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: psubw (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_psubw: -; SLM: # %bb.0: -; SLM-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: psubw (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_psubw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_psubw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_psubw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_psubw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_psubw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_psubw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_psubw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_psubw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_psubw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_psubw: -; SKX: # %bb.0: -; SKX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_psubw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_psubw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_psubw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_psubw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_psubw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: psubw (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_psubw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = sub <8 x i16> %a0, %a1 - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = sub <8 x i16> %1, %2 - ret <8 x i16> %3 -} - -define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_punpckhbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhbw: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; ATOM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhbw: -; SLM: # %bb.0: -; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SLM-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhbw: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; SKX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> - ret <16 x i8> %3 -} - -define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_punpckhdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; GENERIC-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhdq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; ATOM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhdq: -; SLM: # %bb.0: -; SLM-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SLM-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [4:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_punpckhqdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhqdq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhqdq: -; SLM: # %bb.0: -; SLM-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SLM-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhqdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhqdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhqdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhqdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhqdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhqdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhqdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhqdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhqdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhqdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhqdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhqdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhqdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhqdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhqdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhqdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 1, i32 3> - %4 = add <2 x i64> %1, %3 - ret <2 x i64> %4 -} - -define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_punpckhwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckhwd: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; ATOM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckhwd: -; SLM: # %bb.0: -; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SLM-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckhwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckhwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckhwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckhwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckhwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckhwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckhwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckhwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckhwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckhwd: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckhwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckhwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckhwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckhwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckhwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckhwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> - ret <8 x i16> %3 -} - -define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { -; GENERIC-LABEL: test_punpcklbw: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklbw: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; ATOM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklbw: -; SLM: # %bb.0: -; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SLM-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpcklbw: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpcklbw: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpcklbw: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpcklbw: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpcklbw: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklbw: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpcklbw: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklbw: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpcklbw: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklbw: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; SKX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpcklbw: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpcklbw: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [2:0.50] -; BDVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpcklbw: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpcklbw: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; BTVER2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpcklbw: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpcklbw: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> - %2 = load <16 x i8>, <16 x i8> *%a2, align 16 - %3 = shufflevector <16 x i8> %1, <16 x i8> %2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> - ret <16 x i8> %3 -} - -define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { -; GENERIC-LABEL: test_punpckldq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpckldq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpckldq: -; SLM: # %bb.0: -; SLM-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SLM-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [4:1.00] -; SLM-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpckldq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; SANDY-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpckldq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpckldq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpckldq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpckldq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpckldq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpckldq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpckldq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpckldq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpckldq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpckldq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpckldq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpckldq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpckldq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpckldq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpckldq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.25] -; ZNVER1-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> - %2 = load <4 x i32>, <4 x i32> *%a2, align 16 - %3 = shufflevector <4 x i32> %a1, <4 x i32> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> - %4 = add <4 x i32> %1, %3 - ret <4 x i32> %4 -} - -define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_punpcklqdq: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklqdq: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklqdq: -; SLM: # %bb.0: -; SLM-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SLM-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpcklqdq: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpcklqdq: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpcklqdq: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpcklqdq: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpcklqdq: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklqdq: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpcklqdq: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklqdq: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpcklqdq: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklqdq: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpcklqdq: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpcklqdq: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpcklqdq: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpcklqdq: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpcklqdq: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpcklqdq: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = shufflevector <2 x i64> %a1, <2 x i64> %2, <2x i32> <i32 0, i32 2> - %4 = add <2 x i64> %1, %3 - ret <2 x i64> %4 -} - -define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { -; GENERIC-LABEL: test_punpcklwd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_punpcklwd: -; ATOM: # %bb.0: -; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; ATOM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: nop # sched: [1:0.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_punpcklwd: -; SLM: # %bb.0: -; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SLM-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [4:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_punpcklwd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_punpcklwd: -; SANDY: # %bb.0: -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_punpcklwd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_punpcklwd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_punpcklwd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_punpcklwd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; BROADWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_punpcklwd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_punpcklwd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKYLAKE-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_punpcklwd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_punpcklwd: -; SKX: # %bb.0: -; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SKX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_punpcklwd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_punpcklwd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [2:0.50] -; BDVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_punpcklwd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_punpcklwd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; BTVER2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_punpcklwd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_punpcklwd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.25] -; ZNVER1-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [8:0.50] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> - %2 = load <8 x i16>, <8 x i16> *%a2, align 16 - %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> - ret <8 x i16> %3 -} - -define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { -; GENERIC-LABEL: test_pxor: -; GENERIC: # %bb.0: -; GENERIC-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; GENERIC-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_pxor: -; ATOM: # %bb.0: -; ATOM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: pxor (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: paddq %xmm1, %xmm0 # sched: [2:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_pxor: -; SLM: # %bb.0: -; SLM-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: pxor (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_pxor: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; SANDY-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_pxor: -; SANDY: # %bb.0: -; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_pxor: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; HASWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_pxor: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_pxor: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; BROADWELL-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:0.50] -; BROADWELL-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_pxor: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; BROADWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:0.50] -; BROADWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_pxor: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_pxor: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_pxor: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_pxor: -; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_pxor: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_pxor: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_pxor: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_pxor: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_pxor: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: pxor (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_pxor: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = xor <2 x i64> %a0, %a1 - %2 = load <2 x i64>, <2 x i64> *%a2, align 16 - %3 = xor <2 x i64> %1, %2 - %4 = add <2 x i64> %3, %a1 - ret <2 x i64> %4 -} - -define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_shufpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_shufpd: -; ATOM: # %bb.0: -; ATOM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_shufpd: -; SLM: # %bb.0: -; SLM-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SLM-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_shufpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_shufpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_shufpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_shufpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_shufpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_shufpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_shufpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_shufpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_shufpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_shufpd: -; SKX: # %bb.0: -; SKX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_shufpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_shufpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [2:0.50] -; BDVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_shufpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_shufpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_shufpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_shufpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:0.50] -; ZNVER1-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 2> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_sqrtpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] -; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sqrtpd: -; ATOM: # %bb.0: -; ATOM-NEXT: sqrtpd %xmm0, %xmm1 # sched: [125:62.50] -; ATOM-NEXT: sqrtpd (%rdi), %xmm0 # sched: [125:62.50] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sqrtpd: -; SLM: # %bb.0: -; SLM-NEXT: sqrtpd (%rdi), %xmm1 # sched: [74:70.00] -; SLM-NEXT: sqrtpd %xmm0, %xmm0 # sched: [71:70.00] -; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_sqrtpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [21:21.00] -; SANDY-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:21.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:21.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:21.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_sqrtpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00] -; HASWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [22:14.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_sqrtpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00] -; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [22:14.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_sqrtpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [16:14.00] -; BROADWELL-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [21:14.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [16:14.00] -; BROADWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:14.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_sqrtpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] -; SKYLAKE-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] -; SKYLAKE-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_sqrtpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [18:6.00] -; SKX-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [24:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtpd: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [18:6.00] -; SKX-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [24:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_sqrtpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [9:13.50] -; BDVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [14:13.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_sqrtpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [14:13.50] -; BDVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [9:13.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_sqrtpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [27:27.00] -; BTVER2-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [32:27.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_sqrtpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [32:27.00] -; BTVER2-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [27:27.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_sqrtpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: sqrtpd %xmm0, %xmm1 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: sqrtpd (%rdi), %xmm0 # sched: [27:20.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_sqrtpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [27:20.00] -; ZNVER1-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %2) - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} -declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone - -; TODO - sqrtsd_m - -define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { -; GENERIC-LABEL: test_sqrtsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00] -; GENERIC-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_sqrtsd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd (%rdi), %xmm1 # sched: [1:1.00] -; ATOM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [62:31.00] -; ATOM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [62:31.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_sqrtsd: -; SLM: # %bb.0: -; SLM-NEXT: movapd (%rdi), %xmm1 # sched: [3:1.00] -; SLM-NEXT: sqrtsd %xmm0, %xmm0 # sched: [35:35.00] -; SLM-NEXT: sqrtsd %xmm1, %xmm1 # sched: [35:35.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_sqrtsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [21:21.00] -; SANDY-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [21:21.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_sqrtsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:21.00] -; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:21.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_sqrtsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:14.00] -; HASWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:14.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_sqrtsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:14.00] -; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:14.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_sqrtsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [16:8.00] -; BROADWELL-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [16:8.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_sqrtsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [16:8.00] -; BROADWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] -; BROADWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [16:8.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_sqrtsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] -; SKYLAKE-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_sqrtsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] -; SKYLAKE-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SKYLAKE-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_sqrtsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [18:6.00] -; SKX-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] -; SKX-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [18:6.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_sqrtsd: -; SKX: # %bb.0: -; SKX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [18:6.00] -; SKX-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SKX-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [18:6.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_sqrtsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:0.50] -; BDVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [9:13.50] -; BDVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [9:13.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_sqrtsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:0.50] -; BDVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [9:13.50] -; BDVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [9:13.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_sqrtsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [5:1.00] -; BTVER2-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [27:27.00] -; BTVER2-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [27:27.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_sqrtsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vmovapd (%rdi), %xmm1 # sched: [5:1.00] -; BTVER2-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [27:27.00] -; BTVER2-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [27:27.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_sqrtsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: sqrtsd %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: sqrtsd %xmm1, %xmm1 # sched: [20:20.00] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_sqrtsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vmovapd (%rdi), %xmm1 # sched: [8:0.50] -; ZNVER1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [20:20.00] -; ZNVER1-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [20:20.00] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) - %2 = load <2 x double>, <2 x double> *%a1, align 16 - %3 = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2) - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} -declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone - -define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_subpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_subpd: -; ATOM: # %bb.0: -; ATOM-NEXT: subpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: subpd (%rdi), %xmm0 # sched: [7:3.50] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_subpd: -; SLM: # %bb.0: -; SLM-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: subpd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_subpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_subpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_subpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_subpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_subpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_subpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subpd: -; SKX: # %bb.0: -; SKX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_subpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_subpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_subpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_subpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_subpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: subpd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_subpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub <2 x double> %a0, %a1 - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = fsub <2 x double> %1, %2 - ret <2 x double> %3 -} - -define double @test_subsd(double %a0, double %a1, double *%a2) { -; GENERIC-LABEL: test_subsd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_subsd: -; ATOM: # %bb.0: -; ATOM-NEXT: subsd %xmm1, %xmm0 # sched: [5:5.00] -; ATOM-NEXT: subsd (%rdi), %xmm0 # sched: [5:5.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_subsd: -; SLM: # %bb.0: -; SLM-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: subsd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_subsd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_subsd: -; SANDY: # %bb.0: -; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_subsd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_subsd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_subsd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_subsd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_subsd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_subsd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_subsd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [9:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_subsd: -; SKX: # %bb.0: -; SKX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_subsd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_subsd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_subsd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_subsd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_subsd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: subsd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_subsd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = fsub double %a0, %a1 - %2 = load double, double *%a2, align 8 - %3 = fsub double %1, %2 - ret double %3 -} - -define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_ucomisd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %cl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] -; GENERIC-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; GENERIC-NEXT: setnp %al # sched: [1:0.50] -; GENERIC-NEXT: sete %dl # sched: [1:0.50] -; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] -; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] -; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_ucomisd: -; ATOM: # %bb.0: -; ATOM-NEXT: ucomisd %xmm1, %xmm0 # sched: [9:4.50] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %cl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %cl # sched: [1:0.50] -; ATOM-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:5.00] -; ATOM-NEXT: setnp %al # sched: [1:0.50] -; ATOM-NEXT: sete %dl # sched: [1:0.50] -; ATOM-NEXT: andb %al, %dl # sched: [1:0.50] -; ATOM-NEXT: orb %cl, %dl # sched: [1:0.50] -; ATOM-NEXT: movzbl %dl, %eax # sched: [1:1.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_ucomisd: -; SLM: # %bb.0: -; SLM-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %cl # sched: [1:0.50] -; SLM-NEXT: andb %al, %cl # sched: [1:0.50] -; SLM-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00] -; SLM-NEXT: setnp %al # sched: [1:0.50] -; SLM-NEXT: sete %dl # sched: [1:0.50] -; SLM-NEXT: andb %al, %dl # sched: [1:0.50] -; SLM-NEXT: orb %cl, %dl # sched: [1:0.50] -; SLM-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_ucomisd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %cl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-SSE-NEXT: setnp %al # sched: [1:0.50] -; SANDY-SSE-NEXT: sete %dl # sched: [1:0.50] -; SANDY-SSE-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_ucomisd: -; SANDY: # %bb.0: -; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %cl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] -; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setnp %al # sched: [1:0.50] -; SANDY-NEXT: sete %dl # sched: [1:0.50] -; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] -; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] -; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_ucomisd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_ucomisd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %cl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; HASWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:0.50] -; HASWELL-NEXT: sete %dl # sched: [1:0.50] -; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_ucomisd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-SSE-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-SSE-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_ucomisd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %cl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %cl # sched: [1:0.25] -; BROADWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BROADWELL-NEXT: setnp %al # sched: [1:0.50] -; BROADWELL-NEXT: sete %dl # sched: [1:0.50] -; BROADWELL-NEXT: andb %al, %dl # sched: [1:0.25] -; BROADWELL-NEXT: orb %cl, %dl # sched: [1:0.25] -; BROADWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_ucomisd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_ucomisd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %cl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKYLAKE-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKYLAKE-NEXT: setnp %al # sched: [1:0.50] -; SKYLAKE-NEXT: sete %dl # sched: [1:0.50] -; SKYLAKE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKYLAKE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_ucomisd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %cl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-SSE-NEXT: setnp %al # sched: [1:0.50] -; SKX-SSE-NEXT: sete %dl # sched: [1:0.50] -; SKX-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_ucomisd: -; SKX: # %bb.0: -; SKX-NEXT: vucomisd %xmm1, %xmm0 # sched: [2:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %cl # sched: [1:0.50] -; SKX-NEXT: andb %al, %cl # sched: [1:0.25] -; SKX-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SKX-NEXT: setnp %al # sched: [1:0.50] -; SKX-NEXT: sete %dl # sched: [1:0.50] -; SKX-NEXT: andb %al, %dl # sched: [1:0.25] -; SKX-NEXT: orb %cl, %dl # sched: [1:0.25] -; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_ucomisd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_ucomisd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [1:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %cl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BDVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [6:1.00] -; BDVER2-NEXT: setnp %al # sched: [1:0.50] -; BDVER2-NEXT: sete %dl # sched: [1:0.50] -; BDVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_ucomisd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-SSE-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-SSE-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_ucomisd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %cl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %cl # sched: [1:0.50] -; BTVER2-NEXT: vucomisd (%rdi), %xmm0 # sched: [8:1.00] -; BTVER2-NEXT: setnp %al # sched: [1:0.50] -; BTVER2-NEXT: sete %dl # sched: [1:0.50] -; BTVER2-NEXT: andb %al, %dl # sched: [1:0.50] -; BTVER2-NEXT: orb %cl, %dl # sched: [1:0.50] -; BTVER2-NEXT: movzbl %dl, %eax # sched: [1:0.50] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_ucomisd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: ucomisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-SSE-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-SSE-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_ucomisd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %cl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %cl # sched: [1:0.25] -; ZNVER1-NEXT: vucomisd (%rdi), %xmm0 # sched: [10:1.00] -; ZNVER1-NEXT: setnp %al # sched: [1:0.25] -; ZNVER1-NEXT: sete %dl # sched: [1:0.25] -; ZNVER1-NEXT: andb %al, %dl # sched: [1:0.25] -; ZNVER1-NEXT: orb %cl, %dl # sched: [1:0.25] -; ZNVER1-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) - %2 = load <2 x double>, <2 x double> *%a2, align 8 - %3 = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %2) - %4 = or i32 %1, %3 - ret i32 %4 -} -declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readnone - -define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_unpckhpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_unpckhpd: -; ATOM: # %bb.0: -; ATOM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; ATOM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_unpckhpd: -; SLM: # %bb.0: -; SLM-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SLM-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_unpckhpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpckhpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_unpckhpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_unpckhpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_unpckhpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpckhpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; BROADWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_unpckhpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpckhpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKYLAKE-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_unpckhpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpckhpd: -; SKX: # %bb.0: -; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SKX-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_unpckhpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_unpckhpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [2:0.50] -; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_unpckhpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_unpckhpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; BTVER2-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_unpckhpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; ZNVER1-SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_unpckhpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; ZNVER1-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = shufflevector <2 x double> %a1, <2 x double> %2, <2 x i32> <i32 1, i32 3> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_unpcklpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_unpcklpd: -; ATOM: # %bb.0: -; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm2, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_unpcklpd: -; SLM: # %bb.0: -; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [4:1.00] -; SLM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_unpcklpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_unpcklpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_unpcklpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_unpcklpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_unpcklpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_unpcklpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_unpcklpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_unpcklpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_unpcklpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] -; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_unpcklpd: -; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_unpcklpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50] -; BDVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [2:0.50] -; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_unpcklpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [2:0.50] -; BDVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_unpcklpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_unpcklpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_unpcklpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_unpcklpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50] -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> - %2 = load <2 x double>, <2 x double> *%a2, align 16 - %3 = shufflevector <2 x double> %1, <2 x double> %2, <2 x i32> <i32 0, i32 2> - %4 = fadd <2 x double> %1, %3 - ret <2 x double> %4 -} - -define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { -; GENERIC-LABEL: test_xorpd: -; GENERIC: # %bb.0: -; GENERIC-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: retq # sched: [1:1.00] -; -; ATOM-LABEL: test_xorpd: -; ATOM: # %bb.0: -; ATOM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] -; ATOM-NEXT: xorpd (%rdi), %xmm0 # sched: [1:1.00] -; ATOM-NEXT: addpd %xmm1, %xmm0 # sched: [6:3.00] -; ATOM-NEXT: retq # sched: [79:39.50] -; -; SLM-LABEL: test_xorpd: -; SLM: # %bb.0: -; SLM-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] -; SLM-NEXT: xorpd (%rdi), %xmm0 # sched: [4:1.00] -; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SLM-NEXT: retq # sched: [4:1.00] -; -; SANDY-SSE-LABEL: test_xorpd: -; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: retq # sched: [1:1.00] -; -; SANDY-LABEL: test_xorpd: -; SANDY: # %bb.0: -; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] -; -; HASWELL-SSE-LABEL: test_xorpd: -; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; HASWELL-LABEL: test_xorpd: -; HASWELL: # %bb.0: -; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-SSE-LABEL: test_xorpd: -; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] -; -; BROADWELL-LABEL: test_xorpd: -; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; BROADWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BROADWELL-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-SSE-LABEL: test_xorpd: -; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] -; -; SKYLAKE-LABEL: test_xorpd: -; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKYLAKE-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKYLAKE-NEXT: retq # sched: [7:1.00] -; -; SKX-SSE-LABEL: test_xorpd: -; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.33] -; SKX-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50] -; SKX-SSE-NEXT: retq # sched: [7:1.00] -; -; SKX-LABEL: test_xorpd: -; SKX: # %bb.0: -; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SKX-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] -; SKX-NEXT: retq # sched: [7:1.00] -; -; BDVER2-SSE-LABEL: test_xorpd: -; BDVER2-SSE: # %bb.0: -; BDVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [2:0.50] -; BDVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [7:0.50] -; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-SSE-NEXT: retq # sched: [5:1.00] -; -; BDVER2-LABEL: test_xorpd: -; BDVER2: # %bb.0: -; BDVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [2:0.50] -; BDVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; BDVER2-NEXT: retq # sched: [5:1.00] -; -; BTVER2-SSE-LABEL: test_xorpd: -; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.50] -; BTVER2-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-SSE-NEXT: retq # sched: [4:1.00] -; -; BTVER2-LABEL: test_xorpd: -; BTVER2: # %bb.0: -; BTVER2-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; BTVER2-NEXT: retq # sched: [4:1.00] -; -; ZNVER1-SSE-LABEL: test_xorpd: -; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: xorpd %xmm1, %xmm0 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: xorpd (%rdi), %xmm0 # sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] -; -; ZNVER1-LABEL: test_xorpd: -; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.25] -; ZNVER1-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; ZNVER1-NEXT: retq # sched: [1:0.50] - %1 = bitcast <2 x double> %a0 to <4 x i32> - %2 = bitcast <2 x double> %a1 to <4 x i32> - %3 = xor <4 x i32> %1, %2 - %4 = load <2 x double>, <2 x double> *%a2, align 16 - %5 = bitcast <2 x double> %4 to <4 x i32> - %6 = xor <4 x i32> %3, %5 - %7 = bitcast <4 x i32> %6 to <2 x double> - %8 = fadd <2 x double> %a1, %7 - ret <2 x double> %8 -} - -!0 = !{i32 1} |