diff options
| author | Wang, Pengfei <pengfei.wang@intel.com> | 2019-12-24 09:44:22 +0800 |
|---|---|---|
| committer | Wang, Pengfei <pengfei.wang@intel.com> | 2019-12-26 08:15:13 +0800 |
| commit | 472bded3eda44eff84b259b2717e322dbdb7381e (patch) | |
| tree | f1d011d804ba8b4ef93071c0a510b1a000b91b4d /llvm/test/CodeGen/X86 | |
| parent | b082a2952f64b085127e0a0aad0b742c63e5075e (diff) | |
| download | bcm5719-llvm-472bded3eda44eff84b259b2717e322dbdb7381e.tar.gz bcm5719-llvm-472bded3eda44eff84b259b2717e322dbdb7381e.zip | |
[X86] Enable STRICT_SINT_TO_FP/STRICT_UINT_TO_FP on X86 backend
Summary: Enable STRICT_SINT_TO_FP/STRICT_UINT_TO_FP on X86 backend
Reviewers: craig.topper, RKSimon, LiuChen3, uweigand, andrew.w.kaylor
Subscribers: hiraditya, llvm-commits, LuoYuanke
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71871
Diffstat (limited to 'llvm/test/CodeGen/X86')
| -rw-r--r-- | llvm/test/CodeGen/X86/fp-intrinsics.ll | 40 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll | 1305 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fp80-strict-scalar.ll | 270 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll | 646 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll | 421 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll | 390 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll | 342 |
7 files changed, 3210 insertions, 204 deletions
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index 2135cdb0404..7883b9ba468 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -2201,27 +2201,17 @@ entry: define double @sifdl(i64 %x) #0 { ; X87-LABEL: sifdl: ; X87: # %bb.0: # %entry -; X87-NEXT: subl $12, %esp -; X87-NEXT: .cfi_def_cfa_offset 16 -; X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X87-NEXT: movl %eax, (%esp) -; X87-NEXT: fildll (%esp) -; X87-NEXT: addl $12, %esp -; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: fildll {{[0-9]+}}(%esp) ; X87-NEXT: retl ; ; X86-SSE-LABEL: sifdl: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: subl $20, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 24 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: subl $12, %esp +; X86-SSE-NEXT: .cfi_def_cfa_offset 16 ; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fstpl (%esp) ; X86-SSE-NEXT: fldl (%esp) -; X86-SSE-NEXT: addl $20, %esp +; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl ; @@ -2244,27 +2234,17 @@ entry: define float @siffl(i64 %x) #0 { ; X87-LABEL: siffl: ; X87: # %bb.0: # %entry -; X87-NEXT: subl $12, %esp -; X87-NEXT: .cfi_def_cfa_offset 16 -; X87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; X87-NEXT: movl %eax, (%esp) -; X87-NEXT: fildll (%esp) -; X87-NEXT: addl $12, %esp -; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: fildll {{[0-9]+}}(%esp) ; X87-NEXT: retl ; ; X86-SSE-LABEL: siffl: ; X86-SSE: # %bb.0: # %entry -; X86-SSE-NEXT: subl $20, %esp -; X86-SSE-NEXT: .cfi_def_cfa_offset 24 -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; X86-SSE-NEXT: pushl %eax +; X86-SSE-NEXT: .cfi_def_cfa_offset 8 ; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) -; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) -; X86-SSE-NEXT: flds {{[0-9]+}}(%esp) -; X86-SSE-NEXT: addl $20, %esp +; X86-SSE-NEXT: fstps (%esp) +; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll new file mode 100644 index 00000000000..3e7e78a3da3 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -0,0 +1,1305 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86,AVX1-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64,AVX1-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86,AVX512-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64,AVX512-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=X87 + +declare float @llvm.experimental.constrained.sitofp.f32.i1(i1, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i8(i8, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i16(i16, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i1(i1, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i8(i8, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i16(i16, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata) +declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata) + +declare double @llvm.experimental.constrained.sitofp.f64.i1(i1, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i8(i8, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i16(i16, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i1(i1, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i8(i8, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i16(i16, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata) + +define float @sitofp_i1tof32(i1 %x) #0 { +; SSE-X86-LABEL: sitofp_i1tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movb {{[0-9]+}}(%esp), %al +; SSE-X86-NEXT: andb $1, %al +; SSE-X86-NEXT: negb %al +; SSE-X86-NEXT: movsbl %al, %eax +; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i1tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: andb $1, %dil +; SSE-X64-NEXT: negb %dil +; SSE-X64-NEXT: movsbl %dil, %eax +; SSE-X64-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i1tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: movb {{[0-9]+}}(%esp), %al +; AVX-X86-NEXT: andb $1, %al +; AVX-X86-NEXT: negb %al +; AVX-X86-NEXT: movsbl %al, %eax +; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i1tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: andb $1, %dil +; AVX-X64-NEXT: negb %dil +; AVX-X64-NEXT: movsbl %dil, %eax +; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i1tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movb {{[0-9]+}}(%esp), %al +; X87-NEXT: andb $1, %al +; X87-NEXT: negb %al +; X87-NEXT: movsbl %al, %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.sitofp.f32.i1(i1 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @sitofp_i8tof32(i8 %x) #0 { +; SSE-X86-LABEL: sitofp_i8tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i8tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movsbl %dil, %eax +; SSE-X64-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i8tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i8tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movsbl %dil, %eax +; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i8tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.sitofp.f32.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @sitofp_i16tof32(i16 %x) #0 { +; SSE-X86-LABEL: sitofp_i16tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i16tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movswl %di, %eax +; SSE-X64-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i16tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i16tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movswl %di, %eax +; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i16tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.sitofp.f32.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @sitofp_i32tof32(i32 %x) #0 { +; SSE-X86-LABEL: sitofp_i32tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i32tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: cvtsi2ss %edi, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i32tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i32tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i32tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @sitofp_i64tof32(i64 %x) #0 { +; SSE-X86-LABEL: sitofp_i64tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fstps (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i64tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: cvtsi2ss %rdi, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i64tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-X86-NEXT: fstps (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i64tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i64tof32: +; X87: # %bb.0: +; X87-NEXT: fildll {{[0-9]+}}(%esp) +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @uitofp_i1tof32(i1 %x) #0 { +; SSE-X86-LABEL: uitofp_i1tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movb {{[0-9]+}}(%esp), %al +; SSE-X86-NEXT: andb $1, %al +; SSE-X86-NEXT: movzbl %al, %eax +; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i1tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: andl $1, %edi +; SSE-X64-NEXT: cvtsi2ss %edi, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i1tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: movb {{[0-9]+}}(%esp), %al +; AVX-X86-NEXT: andb $1, %al +; AVX-X86-NEXT: movzbl %al, %eax +; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: uitofp_i1tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: andl $1, %edi +; AVX-X64-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: uitofp_i1tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movb {{[0-9]+}}(%esp), %al +; X87-NEXT: andb $1, %al +; X87-NEXT: movzbl %al, %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.uitofp.f32.i1(i1 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @uitofp_i8tof32(i8 %x) #0 { +; SSE-X86-LABEL: uitofp_i8tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i8tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movzbl %dil, %eax +; SSE-X64-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i8tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: uitofp_i8tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movzbl %dil, %eax +; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: uitofp_i8tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.uitofp.f32.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @uitofp_i16tof32(i16 %x) #0 { +; SSE-X86-LABEL: uitofp_i16tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i16tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movzwl %di, %eax +; SSE-X64-NEXT: cvtsi2ss %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i16tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: uitofp_i16tof32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movzwl %di, %eax +; AVX-X64-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: uitofp_i16tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.uitofp.f32.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @uitofp_i32tof32(i32 %x) #0 { +; SSE-X86-LABEL: uitofp_i32tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE-X86-NEXT: orpd %xmm0, %xmm1 +; SSE-X86-NEXT: subsd %xmm0, %xmm1 +; SSE-X86-NEXT: xorps %xmm0, %xmm0 +; SSE-X86-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: popl %eax +; SSE-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i32tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movl %edi, %eax +; SSE-X64-NEXT: cvtsi2ss %rax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX1-X86-LABEL: uitofp_i32tof32: +; AVX1-X86: # %bb.0: +; AVX1-X86-NEXT: pushl %eax +; AVX1-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-X86-NEXT: vorpd %xmm0, %xmm1, %xmm1 +; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX1-X86-NEXT: vmovss %xmm0, (%esp) +; AVX1-X86-NEXT: flds (%esp) +; AVX1-X86-NEXT: popl %eax +; AVX1-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX1-X86-NEXT: retl +; +; AVX1-X64-LABEL: uitofp_i32tof32: +; AVX1-X64: # %bb.0: +; AVX1-X64-NEXT: movl %edi, %eax +; AVX1-X64-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 +; AVX1-X64-NEXT: retq +; +; AVX512-X86-LABEL: uitofp_i32tof32: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: pushl %eax +; AVX512-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX512-X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 +; AVX512-X86-NEXT: vmovss %xmm0, (%esp) +; AVX512-X86-NEXT: flds (%esp) +; AVX512-X86-NEXT: popl %eax +; AVX512-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX512-X86-NEXT: retl +; +; AVX512-X64-LABEL: uitofp_i32tof32: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 +; AVX512-X64-NEXT: retq +; +; X87-LABEL: uitofp_i32tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %ebp +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: .cfi_offset %ebp, -8 +; X87-NEXT: movl %esp, %ebp +; X87-NEXT: .cfi_def_cfa_register %ebp +; X87-NEXT: andl $-8, %esp +; X87-NEXT: subl $8, %esp +; X87-NEXT: movl 8(%ebp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X87-NEXT: fildll (%esp) +; X87-NEXT: movl %ebp, %esp +; X87-NEXT: popl %ebp +; X87-NEXT: .cfi_def_cfa %esp, 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define float @uitofp_i64tof32(i64 %x) #0 { +; SSE-X86-LABEL: uitofp_i64tof32: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $16, %esp +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) +; SSE-X86-NEXT: xorl %eax, %eax +; SSE-X86-NEXT: cmpl $0, 12(%ebp) +; SSE-X86-NEXT: setns %al +; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp) +; SSE-X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp) +; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE-X86-NEXT: movss %xmm0, (%esp) +; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i64tof32: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movq %rdi, %rax +; SSE-X64-NEXT: shrq %rax +; SSE-X64-NEXT: movl %edi, %ecx +; SSE-X64-NEXT: andl $1, %ecx +; SSE-X64-NEXT: orq %rax, %rcx +; SSE-X64-NEXT: testq %rdi, %rdi +; SSE-X64-NEXT: cmovnsq %rdi, %rcx +; SSE-X64-NEXT: cvtsi2ss %rcx, %xmm0 +; SSE-X64-NEXT: jns .LBB9_2 +; SSE-X64-NEXT: # %bb.1: +; SSE-X64-NEXT: addss %xmm0, %xmm0 +; SSE-X64-NEXT: .LBB9_2: +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i64tof32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $16, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-X86-NEXT: xorl %eax, %eax +; AVX-X86-NEXT: cmpl $0, 12(%ebp) +; AVX-X86-NEXT: setns %al +; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; AVX-X86-NEXT: fstps {{[0-9]+}}(%esp) +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX1-X64-LABEL: uitofp_i64tof32: +; AVX1-X64: # %bb.0: +; AVX1-X64-NEXT: movq %rdi, %rax +; AVX1-X64-NEXT: shrq %rax +; AVX1-X64-NEXT: movl %edi, %ecx +; AVX1-X64-NEXT: andl $1, %ecx +; AVX1-X64-NEXT: orq %rax, %rcx +; AVX1-X64-NEXT: testq %rdi, %rdi +; AVX1-X64-NEXT: cmovnsq %rdi, %rcx +; AVX1-X64-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 +; AVX1-X64-NEXT: jns .LBB9_2 +; AVX1-X64-NEXT: # %bb.1: +; AVX1-X64-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX1-X64-NEXT: .LBB9_2: +; AVX1-X64-NEXT: retq +; +; AVX512-X64-LABEL: uitofp_i64tof32: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 +; AVX512-X64-NEXT: retq +; +; X87-LABEL: uitofp_i64tof32: +; X87: # %bb.0: +; X87-NEXT: pushl %ebp +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: .cfi_offset %ebp, -8 +; X87-NEXT: movl %esp, %ebp +; X87-NEXT: .cfi_def_cfa_register %ebp +; X87-NEXT: andl $-8, %esp +; X87-NEXT: subl $16, %esp +; X87-NEXT: movl 8(%ebp), %eax +; X87-NEXT: movl 12(%ebp), %ecx +; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X87-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X87-NEXT: xorl %eax, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: setns %al +; X87-NEXT: fildll {{[0-9]+}}(%esp) +; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; X87-NEXT: fstps {{[0-9]+}}(%esp) +; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: movl %ebp, %esp +; X87-NEXT: popl %ebp +; X87-NEXT: .cfi_def_cfa %esp, 4 +; X87-NEXT: retl + %result = call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret float %result +} + +define double @sitofp_i8tof64(i8 %x) #0 { +; SSE-X86-LABEL: sitofp_i8tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movsbl 8(%ebp), %eax +; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i8tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movsbl %dil, %eax +; SSE-X64-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i8tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: movsbl 8(%ebp), %eax +; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i8tof64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movsbl %dil, %eax +; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i8tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.sitofp.f64.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @sitofp_i16tof64(i16 %x) #0 { +; SSE-X86-LABEL: sitofp_i16tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movswl 8(%ebp), %eax +; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i16tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movswl %di, %eax +; SSE-X64-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i16tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: movswl 8(%ebp), %eax +; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i16tof64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movswl %di, %eax +; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i16tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.sitofp.f64.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @sitofp_i32tof64(i32 %x) #0 { +; SSE-X86-LABEL: sitofp_i32tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: cvtsi2sdl 8(%ebp), %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i32tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: cvtsi2sd %edi, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i32tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i32tof64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i32tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @sitofp_i64tof64(i64 %x) #0 { +; SSE-X86-LABEL: sitofp_i64tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: fildll 8(%ebp) +; SSE-X86-NEXT: fstpl (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: sitofp_i64tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: cvtsi2sd %rdi, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: sitofp_i64tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: fildll 8(%ebp) +; AVX-X86-NEXT: fstpl (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: sitofp_i64tof64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: sitofp_i64tof64: +; X87: # %bb.0: +; X87-NEXT: fildll {{[0-9]+}}(%esp) +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @uitofp_i1tof64(i1 %x) #0 { +; SSE-X86-LABEL: uitofp_i1tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movb 8(%ebp), %al +; SSE-X86-NEXT: andb $1, %al +; SSE-X86-NEXT: movzbl %al, %eax +; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i1tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: andl $1, %edi +; SSE-X64-NEXT: cvtsi2sd %edi, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i1tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: movb 8(%ebp), %al +; AVX-X86-NEXT: andb $1, %al +; AVX-X86-NEXT: movzbl %al, %eax +; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: uitofp_i1tof64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: andl $1, %edi +; AVX-X64-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: uitofp_i1tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movb {{[0-9]+}}(%esp), %al +; X87-NEXT: andb $1, %al +; X87-NEXT: movzbl %al, %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.uitofp.f64.i1(i1 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @uitofp_i8tof64(i8 %x) #0 { +; SSE-X86-LABEL: uitofp_i8tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movzbl 8(%ebp), %eax +; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i8tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movzbl %dil, %eax +; SSE-X64-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i8tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: movzbl 8(%ebp), %eax +; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: uitofp_i8tof64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movzbl %dil, %eax +; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: uitofp_i8tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.uitofp.f64.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @uitofp_i16tof64(i16 %x) #0 { +; SSE-X86-LABEL: uitofp_i16tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movzwl 8(%ebp), %eax +; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X86-NEXT: movsd %xmm0, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i16tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movzwl %di, %eax +; SSE-X64-NEXT: cvtsi2sd %eax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i16tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: movzwl 8(%ebp), %eax +; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: uitofp_i16tof64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: movzwl %di, %eax +; AVX-X64-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 +; AVX-X64-NEXT: retq +; +; X87-LABEL: uitofp_i16tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %eax +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: fildl (%esp) +; X87-NEXT: popl %eax +; X87-NEXT: .cfi_def_cfa_offset 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.uitofp.f64.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @uitofp_i32tof64(i32 %x) #0 { +; SSE-X86-LABEL: uitofp_i32tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE-X86-NEXT: orpd %xmm0, %xmm1 +; SSE-X86-NEXT: subsd %xmm0, %xmm1 +; SSE-X86-NEXT: movsd %xmm1, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i32tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movl %edi, %eax +; SSE-X64-NEXT: cvtsi2sd %rax, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX1-X86-LABEL: uitofp_i32tof64: +; AVX1-X86: # %bb.0: +; AVX1-X86-NEXT: pushl %ebp +; AVX1-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX1-X86-NEXT: .cfi_offset %ebp, -8 +; AVX1-X86-NEXT: movl %esp, %ebp +; AVX1-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX1-X86-NEXT: andl $-8, %esp +; AVX1-X86-NEXT: subl $8, %esp +; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; AVX1-X86-NEXT: vorpd %xmm0, %xmm1, %xmm1 +; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0 +; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX1-X86-NEXT: fldl (%esp) +; AVX1-X86-NEXT: movl %ebp, %esp +; AVX1-X86-NEXT: popl %ebp +; AVX1-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX1-X86-NEXT: retl +; +; AVX1-X64-LABEL: uitofp_i32tof64: +; AVX1-X64: # %bb.0: +; AVX1-X64-NEXT: movl %edi, %eax +; AVX1-X64-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0 +; AVX1-X64-NEXT: retq +; +; AVX512-X86-LABEL: uitofp_i32tof64: +; AVX512-X86: # %bb.0: +; AVX512-X86-NEXT: pushl %ebp +; AVX512-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX512-X86-NEXT: .cfi_offset %ebp, -8 +; AVX512-X86-NEXT: movl %esp, %ebp +; AVX512-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX512-X86-NEXT: andl $-8, %esp +; AVX512-X86-NEXT: subl $8, %esp +; AVX512-X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0 +; AVX512-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX512-X86-NEXT: fldl (%esp) +; AVX512-X86-NEXT: movl %ebp, %esp +; AVX512-X86-NEXT: popl %ebp +; AVX512-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX512-X86-NEXT: retl +; +; AVX512-X64-LABEL: uitofp_i32tof64: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 +; AVX512-X64-NEXT: retq +; +; X87-LABEL: uitofp_i32tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %ebp +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: .cfi_offset %ebp, -8 +; X87-NEXT: movl %esp, %ebp +; X87-NEXT: .cfi_def_cfa_register %ebp +; X87-NEXT: andl $-8, %esp +; X87-NEXT: subl $8, %esp +; X87-NEXT: movl 8(%ebp), %eax +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: movl $0, {{[0-9]+}}(%esp) +; X87-NEXT: fildll (%esp) +; X87-NEXT: movl %ebp, %esp +; X87-NEXT: popl %ebp +; X87-NEXT: .cfi_def_cfa %esp, 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +define double @uitofp_i64tof64(i64 %x) #0 { +; SSE-X86-LABEL: uitofp_i64tof64: +; SSE-X86: # %bb.0: +; SSE-X86-NEXT: pushl %ebp +; SSE-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE-X86-NEXT: .cfi_offset %ebp, -8 +; SSE-X86-NEXT: movl %esp, %ebp +; SSE-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE-X86-NEXT: andl $-8, %esp +; SSE-X86-NEXT: subl $8, %esp +; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; SSE-X86-NEXT: subpd {{\.LCPI.*}}, %xmm0 +; SSE-X86-NEXT: movapd %xmm0, %xmm1 +; SSE-X86-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-X86-NEXT: addpd %xmm0, %xmm1 +; SSE-X86-NEXT: movlpd %xmm1, (%esp) +; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: movl %ebp, %esp +; SSE-X86-NEXT: popl %ebp +; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE-X86-NEXT: retl +; +; SSE-X64-LABEL: uitofp_i64tof64: +; SSE-X64: # %bb.0: +; SSE-X64-NEXT: movq %rdi, %xmm1 +; SSE-X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] +; SSE-X64-NEXT: subpd {{.*}}(%rip), %xmm1 +; SSE-X64-NEXT: movapd %xmm1, %xmm0 +; SSE-X64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] +; SSE-X64-NEXT: addpd %xmm1, %xmm0 +; SSE-X64-NEXT: retq +; +; AVX-X86-LABEL: uitofp_i64tof64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX-X86-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX-X86-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX-X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX-X86-NEXT: vmovlpd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX1-X64-LABEL: uitofp_i64tof64: +; AVX1-X64: # %bb.0: +; AVX1-X64-NEXT: vmovq %rdi, %xmm0 +; AVX1-X64-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; AVX1-X64-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-X64-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] +; AVX1-X64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX1-X64-NEXT: retq +; +; AVX512-X64-LABEL: uitofp_i64tof64: +; AVX512-X64: # %bb.0: +; AVX512-X64-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 +; AVX512-X64-NEXT: retq +; +; X87-LABEL: uitofp_i64tof64: +; X87: # %bb.0: +; X87-NEXT: pushl %ebp +; X87-NEXT: .cfi_def_cfa_offset 8 +; X87-NEXT: .cfi_offset %ebp, -8 +; X87-NEXT: movl %esp, %ebp +; X87-NEXT: .cfi_def_cfa_register %ebp +; X87-NEXT: andl $-8, %esp +; X87-NEXT: subl $16, %esp +; X87-NEXT: movl 8(%ebp), %eax +; X87-NEXT: movl 12(%ebp), %ecx +; X87-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X87-NEXT: movl %eax, (%esp) +; X87-NEXT: xorl %eax, %eax +; X87-NEXT: testl %ecx, %ecx +; X87-NEXT: setns %al +; X87-NEXT: fildll (%esp) +; X87-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: movl %ebp, %esp +; X87-NEXT: popl %ebp +; X87-NEXT: .cfi_def_cfa %esp, 4 +; X87-NEXT: retl + %result = call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll index 0df9f33fb07..a718a9ea466 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -21,6 +21,16 @@ declare i8 @llvm.experimental.constrained.fptoui.i8.x86_fp80(x86_fp80, metadata declare i16 @llvm.experimental.constrained.fptoui.i16.x86_fp80(x86_fp80, metadata) declare i32 @llvm.experimental.constrained.fptoui.i32.x86_fp80(x86_fp80, metadata) declare i64 @llvm.experimental.constrained.fptoui.i64.x86_fp80(x86_fp80, metadata) +declare x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i1(i1, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i8(i8, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i16(i16, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i32(i32, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i64(i64, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i1(i1, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i8(i8, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i16(i16, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i32(i32, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i64(i64, metadata, metadata) define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X86-LABEL: fadd_fp80: @@ -601,4 +611,264 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 { ret i64 %result } +define x86_fp80 @sint1_to_fp80(i1 %x) #0 { +; X86-LABEL: sint1_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $1, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %eax +; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: sint1_to_fp80: +; X64: # %bb.0: +; X64-NEXT: andb $1, %dil +; X64-NEXT: negb %dil +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i1(i1 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @sint8_to_fp80(i8 %x) #0 { +; X86-LABEL: sint8_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: sint8_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movsbl %dil, %eax +; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @sint16_to_fp80(i16 %x) #0 { +; X86-LABEL: sint16_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: sint16_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movw %di, -{{[0-9]+}}(%rsp) +; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @sint32_to_fp80(i32 %x) #0 { +; X86-LABEL: sint32_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: fildl (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: sint32_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; X64-NEXT: fildl -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i32(i32 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @sint64_to_fp80(i64 %x) #0 { +; X86-LABEL: sint64_to_fp80: +; X86: # %bb.0: +; X86-NEXT: fildll {{[0-9]+}}(%esp) +; X86-NEXT: retl +; +; X64-LABEL: sint64_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: fildll -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i64(i64 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @uint1_to_fp80(i1 %x) #0 { +; X86-LABEL: uint1_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: andb $1, %al +; X86-NEXT: movzbl %al, %eax +; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: uint1_to_fp80: +; X64: # %bb.0: +; X64-NEXT: andl $1, %edi +; X64-NEXT: movw %di, -{{[0-9]+}}(%rsp) +; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i1(i1 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @uint8_to_fp80(i8 %x) #0 { +; X86-LABEL: uint8_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) +; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: uint8_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) +; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i8(i8 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @uint16_to_fp80(i16 %x) #0 { +; X86-LABEL: uint16_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %eax +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: fildl (%esp) +; X86-NEXT: popl %eax +; X86-NEXT: .cfi_def_cfa_offset 4 +; X86-NEXT: retl +; +; X64-LABEL: uint16_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; X64-NEXT: fildl -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i16(i16 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @uint32_to_fp80(i32 %x) #0 { +; X86-LABEL: uint32_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: fildll (%esp) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; X64-LABEL: uint32_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X64-NEXT: fildll -{{[0-9]+}}(%rsp) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i32(i32 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + +define x86_fp80 @uint64_to_fp80(i64 %x) #0 { +; X86-LABEL: uint64_to_fp80: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: .cfi_def_cfa_offset 8 +; X86-NEXT: .cfi_offset %ebp, -8 +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: .cfi_def_cfa_register %ebp +; X86-NEXT: andl $-8, %esp +; X86-NEXT: subl $8, %esp +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %eax, (%esp) +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: testl %ecx, %ecx +; X86-NEXT: setns %al +; X86-NEXT: fildll (%esp) +; X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4) +; X86-NEXT: movl %ebp, %esp +; X86-NEXT: popl %ebp +; X86-NEXT: .cfi_def_cfa %esp, 4 +; X86-NEXT: retl +; +; X64-LABEL: uint64_to_fp80: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: testq %rdi, %rdi +; X64-NEXT: setns %al +; X64-NEXT: fildll -{{[0-9]+}}(%rsp) +; X64-NEXT: fadds {{\.LCPI.*}}(,%rax,4) +; X64-NEXT: retq + %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i64(i64 %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret x86_fp80 %result +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll new file mode 100644 index 00000000000..19c94e22da7 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll @@ -0,0 +1,646 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE,SSE-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE,SSE-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX1,AVX-32,AVX1-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX1,AVX-64,AVX1-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-32,AVX512VL-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512VL,AVX-64,AVX512VL-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX,AVX512DQ,AVX512DQ-64 + +declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) + +define <4 x float> @sitofp_v4i1_v4f32(<4 x i1> %x) #0 { +; SSE-LABEL: sitofp_v4i1_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v4i1_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i1(<4 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <4 x float> @uitofp_v4i1_v4f32(<4 x i1> %x) #0 { +; SSE-32-LABEL: uitofp_v4i1_v4f32: +; SSE-32: # %bb.0: +; SSE-32-NEXT: andps {{\.LCPI.*}}, %xmm0 +; SSE-32-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: uitofp_v4i1_v4f32: +; SSE-64: # %bb.0: +; SSE-64-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-64-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-64-NEXT: retq +; +; AVX1-32-LABEL: uitofp_v4i1_v4f32: +; AVX1-32: # %bb.0: +; AVX1-32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-32-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX1-32-NEXT: retl +; +; AVX1-64-LABEL: uitofp_v4i1_v4f32: +; AVX1-64: # %bb.0: +; AVX1-64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-64-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX1-64-NEXT: retq +; +; AVX512VL-32-LABEL: uitofp_v4i1_v4f32: +; AVX512VL-32: # %bb.0: +; AVX512VL-32-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512VL-32-NEXT: retl +; +; AVX512VL-64-LABEL: uitofp_v4i1_v4f32: +; AVX512VL-64: # %bb.0: +; AVX512VL-64-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-64-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512VL-64-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_v4i1_v4f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i1(<4 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <4 x float> @sitofp_v4i8_v4f32(<4 x i8> %x) #0 { +; SSE-LABEL: sitofp_v4i8_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: psrad $24, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v4i8_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i8(<4 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <4 x float> @uitofp_v4i8_v4f32(<4 x i8> %x) #0 { +; SSE-LABEL: uitofp_v4i8_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: uitofp_v4i8_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i8(<4 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <4 x float> @sitofp_v4i16_v4f32(<4 x i16> %x) #0 { +; SSE-LABEL: sitofp_v4i16_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: psrad $16, %xmm0 +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v4i16_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i16(<4 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <4 x float> @uitofp_v4i16_v4f32(<4 x i16> %x) #0 { +; SSE-LABEL: uitofp_v4i16_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: uitofp_v4i16_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i16(<4 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <4 x float> @sitofp_v4i32_v4f32(<4 x i32> %x) #0 { +; SSE-LABEL: sitofp_v4i32_v4f32: +; SSE: # %bb.0: +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v4i32_v4f32: +; AVX: # %bb.0: +; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <4 x float> @uitofp_v4i32_v4f32(<4 x i32> %x) #0 { +; SSE-32-LABEL: uitofp_v4i32_v4f32: +; SSE-32: # %bb.0: +; SSE-32-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] +; SSE-32-NEXT: pand %xmm0, %xmm1 +; SSE-32-NEXT: por {{\.LCPI.*}}, %xmm1 +; SSE-32-NEXT: psrld $16, %xmm0 +; SSE-32-NEXT: por {{\.LCPI.*}}, %xmm0 +; SSE-32-NEXT: addps {{\.LCPI.*}}, %xmm0 +; SSE-32-NEXT: addps %xmm1, %xmm0 +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: uitofp_v4i32_v4f32: +; SSE-64: # %bb.0: +; SSE-64-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] +; SSE-64-NEXT: pand %xmm0, %xmm1 +; SSE-64-NEXT: por {{.*}}(%rip), %xmm1 +; SSE-64-NEXT: psrld $16, %xmm0 +; SSE-64-NEXT: por {{.*}}(%rip), %xmm0 +; SSE-64-NEXT: addps {{.*}}(%rip), %xmm0 +; SSE-64-NEXT: addps %xmm1, %xmm0 +; SSE-64-NEXT: retq +; +; AVX1-32-LABEL: uitofp_v4i32_v4f32: +; AVX1-32: # %bb.0: +; AVX1-32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-32-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; AVX1-32-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-32-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX1-32-NEXT: retl +; +; AVX1-64-LABEL: uitofp_v4i32_v4f32: +; AVX1-64: # %bb.0: +; AVX1-64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-64-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; AVX1-64-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-64-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX1-64-NEXT: retq +; +; AVX512VL-LABEL: uitofp_v4i32_v4f32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0 +; AVX512VL-NEXT: ret{{[l|q]}} +; +; AVX512DQ-LABEL: uitofp_v4i32_v4f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x float> %result +} + +define <2 x double> @sitofp_v2i1_v2f64(<2 x i1> %x) #0 { +; SSE-LABEL: sitofp_v2i1_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE-NEXT: pslld $31, %xmm0 +; SSE-NEXT: psrad $31, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v2i1_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX-NEXT: vpslld $31, %xmm0, %xmm0 +; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i1(<2 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @uitofp_v2i1_v2f64(<2 x i1> %x) #0 { +; SSE-32-LABEL: uitofp_v2i1_v2f64: +; SSE-32: # %bb.0: +; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE-32-NEXT: pand {{\.LCPI.*}}, %xmm0 +; SSE-32-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: uitofp_v2i1_v2f64: +; SSE-64: # %bb.0: +; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SSE-64-NEXT: pand {{.*}}(%rip), %xmm0 +; SSE-64-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-64-NEXT: retq +; +; AVX1-32-LABEL: uitofp_v2i1_v2f64: +; AVX1-32: # %bb.0: +; AVX1-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-32-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-32-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-32-NEXT: retl +; +; AVX1-64-LABEL: uitofp_v2i1_v2f64: +; AVX1-64: # %bb.0: +; AVX1-64-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX1-64-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-64-NEXT: retq +; +; AVX512VL-32-LABEL: uitofp_v2i1_v2f64: +; AVX512VL-32: # %bb.0: +; AVX512VL-32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VL-32-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512VL-32-NEXT: retl +; +; AVX512VL-64-LABEL: uitofp_v2i1_v2f64: +; AVX512VL-64: # %bb.0: +; AVX512VL-64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VL-64-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-64-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512VL-64-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_v2i1_v2f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i1(<2 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @sitofp_v2i8_v2f64(<2 x i8> %x) #0 { +; SSE-LABEL: sitofp_v2i8_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: psrad $24, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v2i8_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i8(<2 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @uitofp_v2i8_v2f64(<2 x i8> %x) #0 { +; SSE-LABEL: uitofp_v2i8_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: uitofp_v2i8_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i8(<2 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @sitofp_v2i16_v2f64(<2 x i16> %x) #0 { +; SSE-LABEL: sitofp_v2i16_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE-NEXT: psrad $16, %xmm0 +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v2i16_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i16(<2 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @uitofp_v2i16_v2f64(<2 x i16> %x) #0 { +; SSE-LABEL: uitofp_v2i16_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: uitofp_v2i16_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i16(<2 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @sitofp_v2i32_v2f64(<2 x i32> %x) #0 { +; SSE-LABEL: sitofp_v2i32_v2f64: +; SSE: # %bb.0: +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: sitofp_v2i32_v2f64: +; AVX: # %bb.0: +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @uitofp_v2i32_v2f64(<2 x i32> %x) #0 { +; SSE-32-LABEL: uitofp_v2i32_v2f64: +; SSE-32: # %bb.0: +; SSE-32-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0] +; SSE-32-NEXT: pand %xmm0, %xmm1 +; SSE-32-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE-32-NEXT: psrld $16, %xmm0 +; SSE-32-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-32-NEXT: mulpd {{\.LCPI.*}}, %xmm0 +; SSE-32-NEXT: addpd %xmm1, %xmm0 +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: uitofp_v2i32_v2f64: +; SSE-64: # %bb.0: +; SSE-64-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0] +; SSE-64-NEXT: pand %xmm0, %xmm1 +; SSE-64-NEXT: cvtdq2pd %xmm1, %xmm1 +; SSE-64-NEXT: psrld $16, %xmm0 +; SSE-64-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-64-NEXT: mulpd {{.*}}(%rip), %xmm0 +; SSE-64-NEXT: addpd %xmm1, %xmm0 +; SSE-64-NEXT: retq +; +; AVX1-32-LABEL: uitofp_v2i32_v2f64: +; AVX1-32: # %bb.0: +; AVX1-32-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX1-32-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX1-32-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-32-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-32-NEXT: vmulpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-32-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX1-32-NEXT: retl +; +; AVX1-64-LABEL: uitofp_v2i32_v2f64: +; AVX1-64: # %bb.0: +; AVX1-64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX1-64-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX1-64-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-64-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-64-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-64-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; AVX1-64-NEXT: retq +; +; AVX512VL-LABEL: uitofp_v2i32_v2f64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 +; AVX512VL-NEXT: ret{{[l|q]}} +; +; AVX512DQ-LABEL: uitofp_v2i32_v2f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 { +; SSE-32-LABEL: sitofp_v2i64_v2f64: +; SSE-32: # %bb.0: +; SSE-32-NEXT: pushl %ebp +; SSE-32-NEXT: .cfi_def_cfa_offset 8 +; SSE-32-NEXT: .cfi_offset %ebp, -8 +; SSE-32-NEXT: movl %esp, %ebp +; SSE-32-NEXT: .cfi_def_cfa_register %ebp +; SSE-32-NEXT: andl $-8, %esp +; SSE-32-NEXT: subl $32, %esp +; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) +; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-32-NEXT: movq %xmm0, {{[0-9]+}}(%esp) +; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) +; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp) +; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) +; SSE-32-NEXT: fstpl (%esp) +; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; SSE-32-NEXT: movl %ebp, %esp +; SSE-32-NEXT: popl %ebp +; SSE-32-NEXT: .cfi_def_cfa %esp, 4 +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: sitofp_v2i64_v2f64: +; SSE-64: # %bb.0: +; SSE-64-NEXT: movq %xmm0, %rax +; SSE-64-NEXT: cvtsi2sd %rax, %xmm1 +; SSE-64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE-64-NEXT: movq %xmm0, %rax +; SSE-64-NEXT: xorps %xmm0, %xmm0 +; SSE-64-NEXT: cvtsi2sd %rax, %xmm0 +; SSE-64-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE-64-NEXT: movapd %xmm1, %xmm0 +; SSE-64-NEXT: retq +; +; AVX-32-LABEL: sitofp_v2i64_v2f64: +; AVX-32: # %bb.0: +; AVX-32-NEXT: pushl %ebp +; AVX-32-NEXT: .cfi_def_cfa_offset 8 +; AVX-32-NEXT: .cfi_offset %ebp, -8 +; AVX-32-NEXT: movl %esp, %ebp +; AVX-32-NEXT: .cfi_def_cfa_register %ebp +; AVX-32-NEXT: andl $-8, %esp +; AVX-32-NEXT: subl $32, %esp +; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) +; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-32-NEXT: fstpl (%esp) +; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; AVX-32-NEXT: movl %ebp, %esp +; AVX-32-NEXT: popl %ebp +; AVX-32-NEXT: .cfi_def_cfa %esp, 4 +; AVX-32-NEXT: retl +; +; AVX-64-LABEL: sitofp_v2i64_v2f64: +; AVX-64: # %bb.0: +; AVX-64-NEXT: vpextrq $1, %xmm0, %rax +; AVX-64-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 +; AVX-64-NEXT: vmovq %xmm0, %rax +; AVX-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 +; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-64-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_v2i64_v2f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x double> @uitofp_v2i64_v2f64(<2 x i64> %x) #0 { +; SSE-32-LABEL: uitofp_v2i64_v2f64: +; SSE-32: # %bb.0: +; SSE-32-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,0,4294967295,0] +; SSE-32-NEXT: pand %xmm0, %xmm1 +; SSE-32-NEXT: por {{\.LCPI.*}}, %xmm1 +; SSE-32-NEXT: psrlq $32, %xmm0 +; SSE-32-NEXT: por {{\.LCPI.*}}, %xmm0 +; SSE-32-NEXT: subpd {{\.LCPI.*}}, %xmm0 +; SSE-32-NEXT: addpd %xmm1, %xmm0 +; SSE-32-NEXT: retl +; +; SSE-64-LABEL: uitofp_v2i64_v2f64: +; SSE-64: # %bb.0: +; SSE-64-NEXT: movdqa {{.*#+}} xmm1 = [4294967295,4294967295] +; SSE-64-NEXT: pand %xmm0, %xmm1 +; SSE-64-NEXT: por {{.*}}(%rip), %xmm1 +; SSE-64-NEXT: psrlq $32, %xmm0 +; SSE-64-NEXT: por {{.*}}(%rip), %xmm0 +; SSE-64-NEXT: subpd {{.*}}(%rip), %xmm0 +; SSE-64-NEXT: addpd %xmm1, %xmm0 +; SSE-64-NEXT: retq +; +; AVX1-32-LABEL: uitofp_v2i64_v2f64: +; AVX1-32: # %bb.0: +; AVX1-32-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-32-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; AVX1-32-NEXT: vpor {{\.LCPI.*}}, %xmm1, %xmm1 +; AVX1-32-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX1-32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX1-32-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX1-32-NEXT: retl +; +; AVX1-64-LABEL: uitofp_v2i64_v2f64: +; AVX1-64: # %bb.0: +; AVX1-64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-64-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] +; AVX1-64-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-64-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX1-64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-64-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX1-64-NEXT: retq +; +; AVX512VL-32-LABEL: uitofp_v2i64_v2f64: +; AVX512VL-32: # %bb.0: +; AVX512VL-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm1 +; AVX512VL-32-NEXT: vpor {{\.LCPI.*}}, %xmm1, %xmm1 +; AVX512VL-32-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vpor {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vsubpd {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX512VL-32-NEXT: retl +; +; AVX512VL-64-LABEL: uitofp_v2i64_v2f64: +; AVX512VL-64: # %bb.0: +; AVX512VL-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 +; AVX512VL-64-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 +; AVX512VL-64-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX512VL-64-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-64-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-64-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX512VL-64-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_v2i64_v2f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll new file mode 100644 index 00000000000..8ab86920fd9 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll @@ -0,0 +1,421 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-32,AVX1-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX1,AVX-64,AVX1-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-32,AVX512VL-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX-64,AVX512VL-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX512DQ,AVX512DQ-64 + +declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) + +define <8 x float> @sitofp_v8i1_v8f32(<8 x i1> %x) #0 { +; CHECK-LABEL: sitofp_v8i1_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vpslld $31, %ymm0, %ymm0 +; CHECK-NEXT: vpsrad $31, %ymm0, %ymm0 +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i1(<8 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <8 x float> @uitofp_v8i1_v8f32(<8 x i1> %x) #0 { +; AVX-32-LABEL: uitofp_v8i1_v8f32: +; AVX-32: # %bb.0: +; AVX-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-32-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX-32-NEXT: retl +; +; AVX-64-LABEL: uitofp_v8i1_v8f32: +; AVX-64: # %bb.0: +; AVX-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX-64-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX-64-NEXT: retq +; +; AVX512DQ-32-LABEL: uitofp_v8i1_v8f32: +; AVX512DQ-32: # %bb.0: +; AVX512DQ-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; AVX512DQ-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQ-32-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX512DQ-32-NEXT: retl +; +; AVX512DQ-64-LABEL: uitofp_v8i1_v8f32: +; AVX512DQ-64: # %bb.0: +; AVX512DQ-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512DQ-64-NEXT: vcvtdq2ps %ymm0, %ymm0 +; AVX512DQ-64-NEXT: retq + %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i1(<8 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <8 x float> @sitofp_v8i8_v8f32(<8 x i8> %x) #0 { +; CHECK-LABEL: sitofp_v8i8_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i8(<8 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <8 x float> @uitofp_v8i8_v8f32(<8 x i8> %x) #0 { +; CHECK-LABEL: uitofp_v8i8_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i8(<8 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <8 x float> @sitofp_v8i16_v8f32(<8 x i16> %x) #0 { +; CHECK-LABEL: sitofp_v8i16_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i16(<8 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <8 x float> @uitofp_v8i16_v8f32(<8 x i16> %x) #0 { +; CHECK-LABEL: uitofp_v8i16_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i16(<8 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <8 x float> @sitofp_v8i32_v8f32(<8 x i32> %x) #0 { +; CHECK-LABEL: sitofp_v8i32_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtdq2ps %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i32(<8 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <8 x float> @uitofp_v8i32_v8f32(<8 x i32> %x) #0 { +; AVX1-LABEL: uitofp_v8i32_v8f32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpbroadcastd {{.*#+}} ymm1 = [1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200,1258291200] +; AVX1-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] +; AVX1-NEXT: vpsrld $16, %ymm0, %ymm0 +; AVX1-NEXT: vpbroadcastd {{.*#+}} ymm2 = [1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928,1392508928] +; AVX1-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11,-5.49764202E+11] +; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vaddps %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512VL-LABEL: uitofp_v8i32_v8f32: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0 +; AVX512VL-NEXT: ret{{[l|q]}} +; +; AVX512DQ-LABEL: uitofp_v8i32_v8f32: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i32(<8 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x float> %result +} + +define <4 x double> @sitofp_v4i1_v4f64(<4 x i1> %x) #0 { +; CHECK-LABEL: sitofp_v4i1_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 +; CHECK-NEXT: vpsrad $31, %xmm0, %xmm0 +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i1(<4 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @uitofp_v4i1_v4f64(<4 x i1> %x) #0 { +; AVX1-LABEL: uitofp_v4i1_v4f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512VL-32-LABEL: uitofp_v4i1_v4f64: +; AVX512VL-32: # %bb.0: +; AVX512VL-32-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 +; AVX512VL-32-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512VL-32-NEXT: retl +; +; AVX512VL-64-LABEL: uitofp_v4i1_v4f64: +; AVX512VL-64: # %bb.0: +; AVX512VL-64-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 +; AVX512VL-64-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512VL-64-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_v4i1_v4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: vbroadcastss {{.*#+}} xmm1 = [1,1,1,1] +; AVX512DQ-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX512DQ-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i1(<4 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @sitofp_v4i8_v4f64(<4 x i8> %x) #0 { +; CHECK-LABEL: sitofp_v4i8_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0 +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i8(<4 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @uitofp_v4i8_v4f64(<4 x i8> %x) #0 { +; CHECK-LABEL: uitofp_v4i8_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i8(<4 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @sitofp_v4i16_v4f64(<4 x i16> %x) #0 { +; CHECK-LABEL: sitofp_v4i16_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i16(<4 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @uitofp_v4i16_v4f64(<4 x i16> %x) #0 { +; CHECK-LABEL: uitofp_v4i16_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i16(<4 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @sitofp_v4i32_v4f64(<4 x i32> %x) #0 { +; CHECK-LABEL: sitofp_v4i32_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtdq2pd %xmm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @uitofp_v4i32_v4f64(<4 x i32> %x) #0 { +; AVX1-LABEL: uitofp_v4i32_v4f64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm1 +; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1 +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4] +; AVX1-NEXT: vmulpd %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 +; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512VL-LABEL: uitofp_v4i32_v4f64: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 +; AVX512VL-NEXT: ret{{[l|q]}} +; +; AVX512DQ-LABEL: uitofp_v4i32_v4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 { +; AVX-32-LABEL: sitofp_v4i64_v4f64: +; AVX-32: # %bb.0: +; AVX-32-NEXT: pushl %ebp +; AVX-32-NEXT: .cfi_def_cfa_offset 8 +; AVX-32-NEXT: .cfi_offset %ebp, -8 +; AVX-32-NEXT: movl %esp, %ebp +; AVX-32-NEXT: .cfi_def_cfa_register %ebp +; AVX-32-NEXT: andl $-8, %esp +; AVX-32-NEXT: subl $64, %esp +; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] +; AVX-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; AVX-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) +; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) +; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) +; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) +; AVX-32-NEXT: fstpl (%esp) +; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-32-NEXT: movl %ebp, %esp +; AVX-32-NEXT: popl %ebp +; AVX-32-NEXT: .cfi_def_cfa %esp, 4 +; AVX-32-NEXT: retl +; +; AVX-64-LABEL: sitofp_v4i64_v4f64: +; AVX-64: # %bb.0: +; AVX-64-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX-64-NEXT: vpextrq $1, %xmm1, %rax +; AVX-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 +; AVX-64-NEXT: vmovq %xmm1, %rax +; AVX-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 +; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX-64-NEXT: vpextrq $1, %xmm0, %rax +; AVX-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 +; AVX-64-NEXT: vmovq %xmm0, %rax +; AVX-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 +; AVX-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-64-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_v4i64_v4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +define <4 x double> @uitofp_v4i64_v4f64(<4 x i64> %x) #0 { +; AVX1-32-LABEL: uitofp_v4i64_v4f64: +; AVX1-32: # %bb.0: +; AVX1-32-NEXT: vpsrlq $32, %ymm0, %ymm1 +; AVX1-32-NEXT: vpor {{\.LCPI.*}}, %ymm1, %ymm1 +; AVX1-32-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] +; AVX1-32-NEXT: vsubpd %ymm2, %ymm1, %ymm1 +; AVX1-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX1-32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7] +; AVX1-32-NEXT: vpor {{\.LCPI.*}}, %ymm0, %ymm0 +; AVX1-32-NEXT: vaddpd %ymm1, %ymm0, %ymm0 +; AVX1-32-NEXT: retl +; +; AVX1-64-LABEL: uitofp_v4i64_v4f64: +; AVX1-64: # %bb.0: +; AVX1-64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-64-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX1-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] +; AVX1-64-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX1-64-NEXT: vpsrlq $32, %ymm0, %ymm0 +; AVX1-64-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] +; AVX1-64-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX1-64-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] +; AVX1-64-NEXT: vsubpd %ymm2, %ymm0, %ymm0 +; AVX1-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVX1-64-NEXT: retq +; +; AVX512VL-32-LABEL: uitofp_v4i64_v4f64: +; AVX512VL-32: # %bb.0: +; AVX512VL-32-NEXT: vpand {{\.LCPI.*}}, %ymm0, %ymm1 +; AVX512VL-32-NEXT: vpor {{\.LCPI.*}}, %ymm1, %ymm1 +; AVX512VL-32-NEXT: vpsrlq $32, %ymm0, %ymm0 +; AVX512VL-32-NEXT: vpor {{\.LCPI.*}}, %ymm0, %ymm0 +; AVX512VL-32-NEXT: vsubpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 +; AVX512VL-32-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVX512VL-32-NEXT: retl +; +; AVX512VL-64-LABEL: uitofp_v4i64_v4f64: +; AVX512VL-64: # %bb.0: +; AVX512VL-64-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm1 +; AVX512VL-64-NEXT: vporq {{.*}}(%rip){1to4}, %ymm1, %ymm1 +; AVX512VL-64-NEXT: vpsrlq $32, %ymm0, %ymm0 +; AVX512VL-64-NEXT: vporq {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-64-NEXT: vsubpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 +; AVX512VL-64-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVX512VL-64-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_v4i64_v4f64: +; AVX512DQ: # %bb.0: +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQ-NEXT: ret{{[l|q]}} + %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <4 x double> %result +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll new file mode 100644 index 00000000000..668d960ade7 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll @@ -0,0 +1,390 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,NODQ-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,NODQ-64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,DQ,DQ-32 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,DQ,DQ-64 + +declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i1(<16 x i1>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i1(<16 x i1>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i8(<16 x i8>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i8(<16 x i8>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i16(<16 x i16>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i16(<16 x i16>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i32(<16 x i32>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i32(<16 x i32>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i1(<8 x i1>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i1(<8 x i1>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i8(<8 x i8>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i8(<8 x i8>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i16(<8 x i16>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i16(<8 x i16>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64>, metadata, metadata) + +define <16 x float> @sitofp_v16i1_v16f32(<16 x i1> %x) #0 { +; CHECK-LABEL: sitofp_v16i1_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK-NEXT: vpslld $31, %zmm0, %zmm0 +; CHECK-NEXT: vpsrad $31, %zmm0, %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i1(<16 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <16 x float> @uitofp_v16i1_v16f32(<16 x i1> %x) #0 { +; NODQ-32-LABEL: uitofp_v16i1_v16f32: +; NODQ-32: # %bb.0: +; NODQ-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; NODQ-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; NODQ-32-NEXT: vcvtdq2ps %zmm0, %zmm0 +; NODQ-32-NEXT: retl +; +; NODQ-64-LABEL: uitofp_v16i1_v16f32: +; NODQ-64: # %bb.0: +; NODQ-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; NODQ-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; NODQ-64-NEXT: vcvtdq2ps %zmm0, %zmm0 +; NODQ-64-NEXT: retq +; +; DQ-32-LABEL: uitofp_v16i1_v16f32: +; DQ-32: # %bb.0: +; DQ-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; DQ-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; DQ-32-NEXT: vcvtdq2ps %zmm0, %zmm0 +; DQ-32-NEXT: retl +; +; DQ-64-LABEL: uitofp_v16i1_v16f32: +; DQ-64: # %bb.0: +; DQ-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; DQ-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; DQ-64-NEXT: vcvtdq2ps %zmm0, %zmm0 +; DQ-64-NEXT: retq + %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i1(<16 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <16 x float> @sitofp_v16i8_v16f32(<16 x i8> %x) #0 { +; CHECK-LABEL: sitofp_v16i8_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i8(<16 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <16 x float> @uitofp_v16i8_v16f32(<16 x i8> %x) #0 { +; CHECK-LABEL: uitofp_v16i8_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i8(<16 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <16 x float> @sitofp_v16i16_v16f32(<16 x i16> %x) #0 { +; CHECK-LABEL: sitofp_v16i16_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i16(<16 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <16 x float> @uitofp_v16i16_v16f32(<16 x i16> %x) #0 { +; CHECK-LABEL: uitofp_v16i16_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i16(<16 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <16 x float> @sitofp_v16i32_v16f32(<16 x i32> %x) #0 { +; CHECK-LABEL: sitofp_v16i32_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i32(<16 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <16 x float> @uitofp_v16i32_v16f32(<16 x i32> %x) #0 { +; CHECK-LABEL: uitofp_v16i32_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i32(<16 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <16 x float> %result +} + +define <8 x double> @sitofp_v8i1_v8f64(<8 x i1> %x) #0 { +; CHECK-LABEL: sitofp_v8i1_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vpslld $31, %ymm0, %ymm0 +; CHECK-NEXT: vpsrad $31, %ymm0, %ymm0 +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i1(<8 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @uitofp_v8i1_v8f64(<8 x i1> %x) #0 { +; NODQ-32-LABEL: uitofp_v8i1_v8f64: +; NODQ-32: # %bb.0: +; NODQ-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; NODQ-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; NODQ-32-NEXT: vcvtdq2pd %ymm0, %zmm0 +; NODQ-32-NEXT: retl +; +; NODQ-64-LABEL: uitofp_v8i1_v8f64: +; NODQ-64: # %bb.0: +; NODQ-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; NODQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; NODQ-64-NEXT: vcvtdq2pd %ymm0, %zmm0 +; NODQ-64-NEXT: retq +; +; DQ-32-LABEL: uitofp_v8i1_v8f64: +; DQ-32: # %bb.0: +; DQ-32-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 +; DQ-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; DQ-32-NEXT: vcvtdq2pd %ymm0, %zmm0 +; DQ-32-NEXT: retl +; +; DQ-64-LABEL: uitofp_v8i1_v8f64: +; DQ-64: # %bb.0: +; DQ-64-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; DQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; DQ-64-NEXT: vcvtdq2pd %ymm0, %zmm0 +; DQ-64-NEXT: retq + %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i1(<8 x i1> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @sitofp_v8i8_v8f64(<8 x i8> %x) #0 { +; CHECK-LABEL: sitofp_v8i8_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i8(<8 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @uitofp_v8i8_v8f64(<8 x i8> %x) #0 { +; CHECK-LABEL: uitofp_v8i8_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i8(<8 x i8> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @sitofp_v8i16_v8f64(<8 x i16> %x) #0 { +; CHECK-LABEL: sitofp_v8i16_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i16(<8 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @uitofp_v8i16_v8f64(<8 x i16> %x) #0 { +; CHECK-LABEL: uitofp_v8i16_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i16(<8 x i16> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @sitofp_v8i32_v8f64(<8 x i32> %x) #0 { +; CHECK-LABEL: sitofp_v8i32_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @uitofp_v8i32_v8f64(<8 x i32> %x) #0 { +; CHECK-LABEL: uitofp_v8i32_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 { +; NODQ-32-LABEL: sitofp_v8i64_v8f64: +; NODQ-32: # %bb.0: +; NODQ-32-NEXT: pushl %ebp +; NODQ-32-NEXT: .cfi_def_cfa_offset 8 +; NODQ-32-NEXT: .cfi_offset %ebp, -8 +; NODQ-32-NEXT: movl %esp, %ebp +; NODQ-32-NEXT: .cfi_def_cfa_register %ebp +; NODQ-32-NEXT: andl $-8, %esp +; NODQ-32-NEXT: subl $128, %esp +; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] +; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1 +; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[2,3,0,1] +; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1] +; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm0 +; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,0,1] +; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl (%esp) +; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) +; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; NODQ-32-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1] +; NODQ-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; NODQ-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 +; NODQ-32-NEXT: movl %ebp, %esp +; NODQ-32-NEXT: popl %ebp +; NODQ-32-NEXT: .cfi_def_cfa %esp, 4 +; NODQ-32-NEXT: retl +; +; NODQ-64-LABEL: sitofp_v8i64_v8f64: +; NODQ-64: # %bb.0: +; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm1 +; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 +; NODQ-64-NEXT: vmovq %xmm1, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 +; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 +; NODQ-64-NEXT: vmovq %xmm2, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 +; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm2 +; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 +; NODQ-64-NEXT: vmovq %xmm2, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 +; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 +; NODQ-64-NEXT: vmovq %xmm0, %rax +; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 +; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 +; NODQ-64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; NODQ-64-NEXT: retq +; +; DQ-LABEL: sitofp_v8i64_v8f64: +; DQ: # %bb.0: +; DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 +; DQ-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +define <8 x double> @uitofp_v8i64_v8f64(<8 x i64> %x) #0 { +; NODQ-32-LABEL: uitofp_v8i64_v8f64: +; NODQ-32: # %bb.0: +; NODQ-32-NEXT: vpandq {{\.LCPI.*}}, %zmm0, %zmm1 +; NODQ-32-NEXT: vporq {{\.LCPI.*}}, %zmm1, %zmm1 +; NODQ-32-NEXT: vpsrlq $32, %zmm0, %zmm0 +; NODQ-32-NEXT: vporq {{\.LCPI.*}}, %zmm0, %zmm0 +; NODQ-32-NEXT: vsubpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0 +; NODQ-32-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; NODQ-32-NEXT: retl +; +; NODQ-64-LABEL: uitofp_v8i64_v8f64: +; NODQ-64: # %bb.0: +; NODQ-64-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm1 +; NODQ-64-NEXT: vporq {{.*}}(%rip){1to8}, %zmm1, %zmm1 +; NODQ-64-NEXT: vpsrlq $32, %zmm0, %zmm0 +; NODQ-64-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; NODQ-64-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; NODQ-64-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; NODQ-64-NEXT: retq +; +; DQ-LABEL: uitofp_v8i64_v8f64: +; DQ: # %bb.0: +; DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; DQ-NEXT: ret{{[l|q]}} + %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <8 x double> %result +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 29da69a979f..7d5f039b27f 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -6241,23 +6241,12 @@ entry: define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: cvtsi2sd %eax, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2sd %eax, %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vextractps $1, %xmm0, %eax -; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1 -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 ; AVX-NEXT: retq entry: %result = call <2 x double> @@ -6309,14 +6298,31 @@ define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: retq ; -; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i64: -; AVX: # %bb.0: # %entry -; AVX-NEXT: vpextrq $1, %xmm0, %rax -; AVX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 -; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX-NEXT: retq +; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 +; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: retq +; +; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64: +; AVX512DQ: # %bb.0: # %entry +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq entry: %result = call <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, @@ -6520,22 +6526,10 @@ entry: define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: cvtsi2sd %eax, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; CHECK-NEXT: movd %xmm1, %eax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2sd %eax, %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; CHECK-NEXT: movd %xmm1, %eax -; CHECK-NEXT: cvtsi2sd %eax, %xmm3 +; CHECK-NEXT: cvtdq2pd %xmm0, %xmm2 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2sd %eax, %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: cvtdq2pd %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm2, %xmm0 ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32: @@ -6605,21 +6599,28 @@ define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: constrained_vector_sitofp_v4f64_v4i64: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vpextrq $1, %xmm1, %rax -; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 -; AVX512-NEXT: vmovq %xmm1, %rax -; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: constrained_vector_sitofp_v4f64_v4i64: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vpextrq $1, %xmm1, %rax +; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 +; AVX512F-NEXT: vmovq %xmm1, %rax +; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64: +; AVX512DQ: # %bb.0: # %entry +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQ-NEXT: retq entry: %result = call <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x, @@ -6667,22 +6668,30 @@ define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; -; AVX512-LABEL: constrained_vector_sitofp_v4f32_v4i64: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 -; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512-NEXT: vmovq %xmm0, %rax -; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 -; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] -; AVX512-NEXT: vpextrq $1, %xmm0, %rax -; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 -; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64: +; AVX512DQ: # %bb.0: # %entry +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq entry: %result = call <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, @@ -6821,32 +6830,32 @@ entry: define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: cvtsi2sd %rax, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2sd %rax, %xmm0 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,65535,0,65535,0] +; CHECK-NEXT: pand %xmm0, %xmm1 +; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 +; CHECK-NEXT: psrld $16, %xmm0 +; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0 +; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: addpd %xmm1, %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vextractps $1, %xmm0, %eax -; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 -; AVX1-NEXT: vmovd %xmm0, %eax -; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX1-NEXT: vcvtdq2pd %xmm1, %xmm1 +; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 +; AVX1-NEXT: vmulpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vextractps $1, %xmm0, %eax -; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1 -; AVX512-NEXT: vmovd %xmm0, %eax -; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm0 -; AVX512-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq entry: %result = call <2 x double> @@ -6917,16 +6926,24 @@ define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { ; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i64: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vpsrlq $32, %xmm0, %xmm0 -; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpsrlq $32, %xmm0, %xmm0 +; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64: +; AVX512DQ: # %bb.0: # %entry +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512DQ-NEXT: vzeroupper +; AVX512DQ-NEXT: retq entry: %result = call <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, @@ -7300,22 +7317,22 @@ entry: define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: cvtsi2sd %rax, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] -; CHECK-NEXT: movd %xmm1, %eax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2sd %rax, %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,1,2,3] -; CHECK-NEXT: movd %xmm1, %eax -; CHECK-NEXT: cvtsi2sd %rax, %xmm3 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] -; CHECK-NEXT: movd %xmm0, %eax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2sd %rax, %xmm1 -; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] -; CHECK-NEXT: movapd %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm1 +; CHECK-NEXT: psrld $16, %xmm1 +; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 +; CHECK-NEXT: movapd {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4] +; CHECK-NEXT: mulpd %xmm2, %xmm1 +; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1] +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 +; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0 +; CHECK-NEXT: addpd %xmm1, %xmm0 +; CHECK-NEXT: movdqa %xmm3, %xmm1 +; CHECK-NEXT: psrld $16, %xmm1 +; CHECK-NEXT: cvtdq2pd %xmm1, %xmm4 +; CHECK-NEXT: mulpd %xmm2, %xmm4 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm3 +; CHECK-NEXT: cvtdq2pd %xmm3, %xmm1 +; CHECK-NEXT: addpd %xmm4, %xmm1 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32: @@ -7331,14 +7348,9 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 { ; ; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1 -; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4] -; AVX512-NEXT: vmulpd %ymm2, %ymm1, %ymm1 -; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] -; AVX512-NEXT: vcvtdq2pd %xmm0, %ymm0 -; AVX512-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq entry: %result = call <4 x double> @@ -7351,36 +7363,30 @@ entry: define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrld $16, %xmm1 -; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK-NEXT: mulps {{.*}}(%rip), %xmm1 -; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 -; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] +; CHECK-NEXT: pand %xmm0, %xmm1 +; CHECK-NEXT: por {{.*}}(%rip), %xmm1 +; CHECK-NEXT: psrld $16, %xmm0 +; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: addps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: addps %xmm1, %xmm0 ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 -; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 -; AVX1-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1 -; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] +; AVX1-NEXT: vaddps {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1 -; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [6.5536E+4,6.5536E+4,6.5536E+4,6.5536E+4] -; AVX512-NEXT: vmulps %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] -; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 -; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 +; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 +; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq entry: %result = call <4 x float> @@ -7426,19 +7432,26 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { ; AVX1-NEXT: vaddpd %ymm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i64: -; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] -; AVX512-NEXT: vpor %ymm2, %ymm1, %ymm1 -; AVX512-NEXT: vpsrlq $32, %ymm0, %ymm0 -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] -; AVX512-NEXT: vpor %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] -; AVX512-NEXT: vsubpd %ymm2, %ymm0, %ymm0 -; AVX512-NEXT: vaddpd %ymm0, %ymm1, %ymm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] +; AVX512F-NEXT: vpor %ymm2, %ymm1, %ymm1 +; AVX512F-NEXT: vpsrlq $32, %ymm0, %ymm0 +; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm2 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] +; AVX512F-NEXT: vpor %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm2 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] +; AVX512F-NEXT: vsubpd %ymm2, %ymm0, %ymm0 +; AVX512F-NEXT: vaddpd %ymm0, %ymm1, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64: +; AVX512DQ: # %bb.0: # %entry +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 +; AVX512DQ-NEXT: retq entry: %result = call <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x, @@ -7592,28 +7605,9 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1] -; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpsrlq $1, %ymm0, %ymm3 -; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 -; AVX512DQ-NEXT: vmovq %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512DQ-NEXT: vmovq %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] -; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] -; AVX512DQ-NEXT: vaddps %xmm0, %xmm0, %xmm2 -; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 -; AVX512DQ-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 +; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq entry: |

