diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll | 1193 |
1 files changed, 772 insertions, 421 deletions
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 40ff465a9dd..cca16cc0d70 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -4465,18 +4465,38 @@ entry: define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm0, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %rax +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm0, %xmm2 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: ja .LBB115_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movaps %xmm2, %xmm1 +; CHECK-NEXT: .LBB115_2: # %entry +; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rcx +; CHECK-NEXT: setbe %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: shlq $63, %rax +; CHECK-NEXT: xorq %rcx, %rax ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vcvttss2si %xmm0, %rax +; AVX1-NEXT: vcomiss %xmm0, %xmm1 +; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: ja .LBB115_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: .LBB115_2: # %entry +; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vcvttss2si %xmm0, %rcx +; AVX1-NEXT: setbe %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: shlq $63, %rax +; AVX1-NEXT: xorq %rcx, %rax ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32: @@ -4493,30 +4513,70 @@ entry: define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm0, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %rax -; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm0, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm2, %xmm1 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB116_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movaps %xmm1, %xmm3 +; CHECK-NEXT: .LBB116_2: # %entry +; CHECK-NEXT: subss %xmm3, %xmm2 ; CHECK-NEXT: cvttss2si %xmm2, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm3, %xmm1 +; CHECK-NEXT: ja .LBB116_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: .LBB116_4: # %entry +; CHECK-NEXT: subss %xmm0, %xmm3 +; CHECK-NEXT: cvttss2si %xmm3, %rax +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vcvttss2si %xmm1, %rax -; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm2, %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: ja .LBB116_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm3 +; AVX1-NEXT: .LBB116_2: # %entry +; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcvttss2si %xmm2, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm3, %xmm0 +; AVX1-NEXT: ja .LBB116_4 +; AVX1-NEXT: # %bb.3: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: .LBB116_4: # %entry +; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttss2si %xmm0, %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f32: @@ -4537,35 +4597,95 @@ entry: define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm0, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %rax -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm0, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %rdx +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm0, %xmm1 -; CHECK-NEXT: cvttss2si %xmm1, %rcx +; CHECK-NEXT: comiss %xmm2, %xmm1 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB117_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movaps %xmm1, %xmm3 +; CHECK-NEXT: .LBB117_2: # %entry +; CHECK-NEXT: subss %xmm3, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %rcx +; CHECK-NEXT: setbe %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: shlq $63, %rax +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm2, %xmm1 +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB117_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movaps %xmm1, %xmm3 +; CHECK-NEXT: .LBB117_4: # %entry +; CHECK-NEXT: subss %xmm3, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %rcx +; CHECK-NEXT: setbe %dl +; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: shlq $63, %rdx +; CHECK-NEXT: xorq %rcx, %rdx +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm2, %xmm1 +; CHECK-NEXT: ja .LBB117_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: .LBB117_6: # %entry +; CHECK-NEXT: subss %xmm0, %xmm2 +; CHECK-NEXT: cvttss2si %xmm2, %rsi +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rsi, %rcx ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vcvttss2si %xmm1, %rax -; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm2, %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: ja .LBB117_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm3 +; AVX1-NEXT: .LBB117_2: # %entry +; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttss2si %xmm2, %rax -; AVX1-NEXT: vmovq %rax, %xmm2 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm3, %xmm0 +; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: ja .LBB117_4 +; AVX1-NEXT: # %bb.3: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm4 +; AVX1-NEXT: .LBB117_4: # %entry +; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vcvttss2si %xmm3, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm3, %xmm0 +; AVX1-NEXT: ja .LBB117_6 +; AVX1-NEXT: # %bb.5: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: .LBB117_6: # %entry +; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttss2si %xmm0, %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32: @@ -4590,49 +4710,129 @@ entry: define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorps %xmm1, %xmm1 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm1, %xmm0 -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm2 +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm0, %xmm2 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB118_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movaps %xmm2, %xmm3 +; CHECK-NEXT: .LBB118_2: # %entry +; CHECK-NEXT: subss %xmm3, %xmm0 +; CHECK-NEXT: cvttss2si %xmm0, %rcx +; CHECK-NEXT: setbe %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: shlq $63, %rax +; CHECK-NEXT: xorq %rcx, %rax ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm1, %xmm0 +; CHECK-NEXT: comiss %xmm0, %xmm2 +; CHECK-NEXT: xorps %xmm4, %xmm4 +; CHECK-NEXT: ja .LBB118_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movaps %xmm2, %xmm4 +; CHECK-NEXT: .LBB118_4: # %entry +; CHECK-NEXT: movq %rax, %xmm3 +; CHECK-NEXT: subss %xmm4, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm1, %xmm2 -; CHECK-NEXT: cvttss2si %xmm2, %rax -; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: subss %xmm1, %xmm3 -; CHECK-NEXT: cvttss2si %xmm3, %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm4, %xmm2 +; CHECK-NEXT: xorps %xmm5, %xmm5 +; CHECK-NEXT: ja .LBB118_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: movaps %xmm2, %xmm5 +; CHECK-NEXT: .LBB118_6: # %entry +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; CHECK-NEXT: subss %xmm5, %xmm4 +; CHECK-NEXT: cvttss2si %xmm4, %rax +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm3 +; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: comiss %xmm4, %xmm2 +; CHECK-NEXT: ja .LBB118_8 +; CHECK-NEXT: # %bb.7: # %entry +; CHECK-NEXT: movaps %xmm2, %xmm1 +; CHECK-NEXT: .LBB118_8: # %entry +; CHECK-NEXT: subss %xmm1, %xmm4 +; CHECK-NEXT: cvttss2si %xmm4, %rax +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vcvttss2si %xmm1, %rax -; AVX1-NEXT: vmovq %rax, %xmm1 -; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vcvttss2si %xmm2, %rax -; AVX1-NEXT: vmovq %rax, %xmm2 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] ; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vcvttss2si %xmm2, %rax -; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm2, %xmm0 +; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: ja .LBB118_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm3 +; AVX1-NEXT: .LBB118_2: # %entry +; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcvttss2si %xmm2, %rcx +; AVX1-NEXT: setbe %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: shlq $63, %rax +; AVX1-NEXT: xorq %rcx, %rax ; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; AVX1-NEXT: vsubss %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vcomiss %xmm3, %xmm0 +; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: ja .LBB118_4 +; AVX1-NEXT: # %bb.3: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm4 +; AVX1-NEXT: .LBB118_4: # %entry +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vcvttss2si %xmm3, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 +; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm4, %xmm0 +; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: ja .LBB118_6 +; AVX1-NEXT: # %bb.5: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm5 +; AVX1-NEXT: .LBB118_6: # %entry +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3 +; AVX1-NEXT: vcvttss2si %xmm3, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 +; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; AVX1-NEXT: vcomiss %xmm4, %xmm0 +; AVX1-NEXT: ja .LBB118_8 +; AVX1-NEXT: # %bb.7: # %entry +; AVX1-NEXT: vmovaps %xmm0, %xmm1 +; AVX1-NEXT: .LBB118_8: # %entry +; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0 ; AVX1-NEXT: vcvttss2si %xmm0, %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32: @@ -4810,18 +5010,38 @@ entry: define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm0, %xmm0 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: cvttsd2si %xmm1, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: comisd %xmm0, %xmm2 +; CHECK-NEXT: xorpd %xmm1, %xmm1 +; CHECK-NEXT: ja .LBB123_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movapd %xmm2, %xmm1 +; CHECK-NEXT: .LBB123_2: # %entry +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %rcx +; CHECK-NEXT: setbe %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: shlq $63, %rax +; CHECK-NEXT: xorq %rcx, %rax ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vcvttsd2si %xmm0, %rax +; AVX1-NEXT: vcomisd %xmm0, %xmm1 +; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: ja .LBB123_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovapd %xmm1, %xmm2 +; AVX1-NEXT: .LBB123_2: # %entry +; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vcvttsd2si %xmm0, %rcx +; AVX1-NEXT: setbe %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: shlq $63, %rax +; AVX1-NEXT: xorq %rcx, %rax ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64: @@ -4838,30 +5058,70 @@ entry: define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm0, %xmm0 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: cvttsd2si %xmm1, %rax -; CHECK-NEXT: movq %rax, %xmm1 ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: comisd %xmm2, %xmm1 +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: xorpd %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB124_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movapd %xmm1, %xmm3 +; CHECK-NEXT: .LBB124_2: # %entry +; CHECK-NEXT: subsd %xmm3, %xmm2 ; CHECK-NEXT: cvttsd2si %xmm2, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm2 +; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: comisd %xmm3, %xmm1 +; CHECK-NEXT: ja .LBB124_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: .LBB124_4: # %entry +; CHECK-NEXT: subsd %xmm0, %xmm3 +; CHECK-NEXT: cvttsd2si %xmm3, %rax +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm0 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vcvttsd2si %xmm1, %rax -; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm2, %xmm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: ja .LBB124_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm3 +; AVX1-NEXT: .LBB124_2: # %entry +; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcvttsd2si %xmm2, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm3, %xmm0 +; AVX1-NEXT: ja .LBB124_4 +; AVX1-NEXT: # %bb.3: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm1 +; AVX1-NEXT: .LBB124_4: # %entry +; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: retq ; ; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64: @@ -4890,35 +5150,95 @@ entry: define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm0, %xmm0 -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: cvttsd2si %xmm1, %rax -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: cvttsd2si %xmm1, %rdx +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: subsd %xmm0, %xmm1 -; CHECK-NEXT: cvttsd2si %xmm1, %rcx +; CHECK-NEXT: comisd %xmm2, %xmm1 +; CHECK-NEXT: xorpd %xmm0, %xmm0 +; CHECK-NEXT: xorpd %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB125_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movapd %xmm1, %xmm3 +; CHECK-NEXT: .LBB125_2: # %entry +; CHECK-NEXT: subsd %xmm3, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %rcx +; CHECK-NEXT: setbe %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: shlq $63, %rax +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: comisd %xmm2, %xmm1 +; CHECK-NEXT: xorpd %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB125_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movapd %xmm1, %xmm3 +; CHECK-NEXT: .LBB125_4: # %entry +; CHECK-NEXT: subsd %xmm3, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %rcx +; CHECK-NEXT: setbe %dl +; CHECK-NEXT: movzbl %dl, %edx +; CHECK-NEXT: shlq $63, %rdx +; CHECK-NEXT: xorq %rcx, %rdx +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: comisd %xmm2, %xmm1 +; CHECK-NEXT: ja .LBB125_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: movapd %xmm1, %xmm0 +; CHECK-NEXT: .LBB125_6: # %entry +; CHECK-NEXT: subsd %xmm0, %xmm2 +; CHECK-NEXT: cvttsd2si %xmm2, %rsi +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rsi, %rcx ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vcvttsd2si %xmm1, %rax -; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2 +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm2, %xmm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: ja .LBB125_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm3 +; AVX1-NEXT: .LBB125_2: # %entry +; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vcvttsd2si %xmm2, %rax -; AVX1-NEXT: vmovq %rax, %xmm2 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0 +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm2 +; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm3, %xmm0 +; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: ja .LBB125_4 +; AVX1-NEXT: # %bb.3: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm4 +; AVX1-NEXT: .LBB125_4: # %entry +; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vcvttsd2si %xmm3, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm3, %xmm0 +; AVX1-NEXT: ja .LBB125_6 +; AVX1-NEXT: # %bb.5: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm1 +; AVX1-NEXT: .LBB125_6: # %entry +; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64: @@ -4943,49 +5263,129 @@ entry: define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { ; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: subsd %xmm1, %xmm0 -; CHECK-NEXT: cvttsd2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: subsd %xmm1, %xmm0 -; CHECK-NEXT: cvttsd2si %xmm0, %rax -; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: subsd %xmm1, %xmm2 -; CHECK-NEXT: cvttsd2si %xmm2, %rax -; CHECK-NEXT: movq %rax, %xmm2 -; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero -; CHECK-NEXT: subsd %xmm1, %xmm3 -; CHECK-NEXT: cvttsd2si %xmm3, %rax -; CHECK-NEXT: movq %rax, %xmm1 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: comisd %xmm0, %xmm2 +; CHECK-NEXT: xorpd %xmm1, %xmm1 +; CHECK-NEXT: xorpd %xmm3, %xmm3 +; CHECK-NEXT: ja .LBB126_2 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: movapd %xmm2, %xmm3 +; CHECK-NEXT: .LBB126_2: # %entry +; CHECK-NEXT: subsd %xmm3, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %rcx +; CHECK-NEXT: setbe %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: shlq $63, %rax +; CHECK-NEXT: xorq %rcx, %rax +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: comisd %xmm0, %xmm2 +; CHECK-NEXT: xorpd %xmm4, %xmm4 +; CHECK-NEXT: ja .LBB126_4 +; CHECK-NEXT: # %bb.3: # %entry +; CHECK-NEXT: movapd %xmm2, %xmm4 +; CHECK-NEXT: .LBB126_4: # %entry +; CHECK-NEXT: movq %rax, %xmm3 +; CHECK-NEXT: subsd %xmm4, %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %rax +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm0 +; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero +; CHECK-NEXT: comisd %xmm4, %xmm2 +; CHECK-NEXT: xorpd %xmm5, %xmm5 +; CHECK-NEXT: ja .LBB126_6 +; CHECK-NEXT: # %bb.5: # %entry +; CHECK-NEXT: movapd %xmm2, %xmm5 +; CHECK-NEXT: .LBB126_6: # %entry +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; CHECK-NEXT: subsd %xmm5, %xmm4 +; CHECK-NEXT: cvttsd2si %xmm4, %rax +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm3 +; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero +; CHECK-NEXT: comisd %xmm4, %xmm2 +; CHECK-NEXT: ja .LBB126_8 +; CHECK-NEXT: # %bb.7: # %entry +; CHECK-NEXT: movapd %xmm2, %xmm1 +; CHECK-NEXT: .LBB126_8: # %entry +; CHECK-NEXT: subsd %xmm1, %xmm4 +; CHECK-NEXT: cvttsd2si %xmm4, %rax +; CHECK-NEXT: setbe %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: shlq $63, %rcx +; CHECK-NEXT: xorq %rax, %rcx +; CHECK-NEXT: movq %rcx, %xmm1 +; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vcvttsd2si %xmm1, %rax -; AVX1-NEXT: vmovq %rax, %xmm1 ; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vcvttsd2si %xmm2, %rax -; AVX1-NEXT: vmovq %rax, %xmm2 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] -; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2 -; AVX1-NEXT: vcvttsd2si %xmm2, %rax -; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm2, %xmm0 +; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: ja .LBB126_2 +; AVX1-NEXT: # %bb.1: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm3 +; AVX1-NEXT: .LBB126_2: # %entry +; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcvttsd2si %xmm2, %rcx +; AVX1-NEXT: setbe %al +; AVX1-NEXT: movzbl %al, %eax +; AVX1-NEXT: shlq $63, %rax +; AVX1-NEXT: xorq %rcx, %rax ; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero -; AVX1-NEXT: vsubsd %xmm0, %xmm3, %xmm0 +; AVX1-NEXT: vcomisd %xmm3, %xmm0 +; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: ja .LBB126_4 +; AVX1-NEXT: # %bb.3: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm4 +; AVX1-NEXT: .LBB126_4: # %entry +; AVX1-NEXT: vmovq %rax, %xmm2 +; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 +; AVX1-NEXT: vcvttsd2si %xmm3, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 +; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm4, %xmm0 +; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5 +; AVX1-NEXT: ja .LBB126_6 +; AVX1-NEXT: # %bb.5: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm5 +; AVX1-NEXT: .LBB126_6: # %entry +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] +; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3 +; AVX1-NEXT: vcvttsd2si %xmm3, %rax +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm3 +; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero +; AVX1-NEXT: vcomisd %xmm4, %xmm0 +; AVX1-NEXT: ja .LBB126_8 +; AVX1-NEXT: # %bb.7: # %entry +; AVX1-NEXT: vmovapd %xmm0, %xmm1 +; AVX1-NEXT: .LBB126_8: # %entry +; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0 ; AVX1-NEXT: vcvttsd2si %xmm0, %rax -; AVX1-NEXT: vmovq %rax, %xmm0 -; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: setbe %cl +; AVX1-NEXT: movzbl %cl, %ecx +; AVX1-NEXT: shlq $63, %rcx +; AVX1-NEXT: xorq %rax, %rcx +; AVX1-NEXT: vmovq %rcx, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64: @@ -6384,34 +6784,34 @@ entry: define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: js .LBB170_1 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB170_1: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: shrq %rax -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: orq %rax, %rcx +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: cmovnsq %rdi, %rcx +; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 +; CHECK-NEXT: jns .LBB170_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addss %xmm0, %xmm0 +; CHECK-NEXT: .LBB170_2: # %entry ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: testq %rdi, %rdi -; AVX1-NEXT: js .LBB170_1 -; AVX1-NEXT: # %bb.2: # %entry -; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 -; AVX1-NEXT: retq -; AVX1-NEXT: .LBB170_1: ; AVX1-NEXT: movq %rdi, %rax ; AVX1-NEXT: shrq %rax -; AVX1-NEXT: andl $1, %edi -; AVX1-NEXT: orq %rax, %rdi -; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 +; AVX1-NEXT: movl %edi, %ecx +; AVX1-NEXT: andl $1, %ecx +; AVX1-NEXT: orq %rax, %rcx +; AVX1-NEXT: testq %rdi, %rdi +; AVX1-NEXT: cmovnsq %rdi, %rcx +; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 +; AVX1-NEXT: jns .LBB170_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: .LBB170_2: # %entry ; AVX1-NEXT: retq ; ; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64: @@ -6548,74 +6948,65 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: movq %xmm0, %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: js .LBB174_1 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2ss %rax, %xmm0 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; CHECK-NEXT: movq %xmm1, %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jns .LBB174_5 -; CHECK-NEXT: .LBB174_4: ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: shrq %rcx -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: orq %rcx, %rax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ss %rax, %xmm1 -; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB174_1: -; CHECK-NEXT: movq %rax, %rcx -; CHECK-NEXT: shrq %rcx -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 +; CHECK-NEXT: jns .LBB174_2 +; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: addss %xmm0, %xmm0 +; CHECK-NEXT: .LBB174_2: # %entry ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; CHECK-NEXT: movq %xmm1, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: shrq %rcx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: js .LBB174_4 -; CHECK-NEXT: .LBB174_5: # %entry +; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ss %rax, %xmm1 +; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 +; CHECK-NEXT: jns .LBB174_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: addss %xmm1, %xmm1 +; CHECK-NEXT: .LBB174_4: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpextrq $1, %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB174_1 -; AVX1-NEXT: # %bb.2: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 -; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: jns .LBB174_5 -; AVX1-NEXT: .LBB174_4: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX1-NEXT: retq -; AVX1-NEXT: .LBB174_1: -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1 +; AVX1-NEXT: jns .LBB174_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: .LBB174_2: # %entry ; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB174_4 -; AVX1-NEXT: .LBB174_5: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm0 +; AVX1-NEXT: jns .LBB174_4 +; AVX1-NEXT: # %bb.3: +; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: .LBB174_4: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; AVX1-NEXT: retq ; @@ -6805,100 +7196,90 @@ entry: define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: shrq %rax +; CHECK-NEXT: movl %esi, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: orq %rax, %rcx ; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: js .LBB178_1 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: cvtsi2ss %rsi, %xmm1 -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: jns .LBB178_5 -; CHECK-NEXT: .LBB178_4: +; CHECK-NEXT: cmovnsq %rsi, %rcx +; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 +; CHECK-NEXT: jns .LBB178_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addss %xmm1, %xmm1 +; CHECK-NEXT: .LBB178_2: # %entry ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: shrq %rax -; CHECK-NEXT: andl $1, %edi -; CHECK-NEXT: orq %rax, %rdi -; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: orq %rax, %rcx +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: cmovnsq %rdi, %rcx +; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 +; CHECK-NEXT: jns .LBB178_4 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addss %xmm0, %xmm0 +; CHECK-NEXT: .LBB178_4: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: jns .LBB178_8 -; CHECK-NEXT: .LBB178_7: ; CHECK-NEXT: movq %rdx, %rax ; CHECK-NEXT: shrq %rax -; CHECK-NEXT: andl $1, %edx -; CHECK-NEXT: orq %rax, %rdx -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 -; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB178_1: -; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: shrq %rax -; CHECK-NEXT: andl $1, %esi -; CHECK-NEXT: orq %rax, %rsi -; CHECK-NEXT: cvtsi2ss %rsi, %xmm1 -; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: js .LBB178_4 -; CHECK-NEXT: .LBB178_5: # %entry -; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 -; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: andl $1, %ecx +; CHECK-NEXT: orq %rax, %rcx ; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: js .LBB178_7 -; CHECK-NEXT: .LBB178_8: # %entry +; CHECK-NEXT: cmovnsq %rdx, %rcx ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 +; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 +; CHECK-NEXT: jns .LBB178_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: addss %xmm1, %xmm1 +; CHECK-NEXT: .LBB178_6: # %entry ; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpextrq $1, %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB178_1 -; AVX1-NEXT: # %bb.2: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 -; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: jns .LBB178_5 -; AVX1-NEXT: .LBB178_4: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 -; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: jmp .LBB178_6 -; AVX1-NEXT: .LBB178_1: -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1 +; AVX1-NEXT: jns .LBB178_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: .LBB178_2: # %entry ; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB178_4 -; AVX1-NEXT: .LBB178_5: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 -; AVX1-NEXT: .LBB178_6: # %entry +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2 +; AVX1-NEXT: jns .LBB178_4 +; AVX1-NEXT: # %bb.3: +; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: .LBB178_4: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB178_7 -; AVX1-NEXT: # %bb.8: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 -; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; AVX1-NEXT: .LBB178_7: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0 +; AVX1-NEXT: jns .LBB178_6 +; AVX1-NEXT: # %bb.5: ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: .LBB178_6: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -6949,10 +7330,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 { ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 ; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0 ; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -6978,12 +7359,12 @@ entry: define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [65535,65535,65535,65535] -; CHECK-NEXT: andps %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm0, %xmm1 +; CHECK-NEXT: psrld $16, %xmm1 ; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK-NEXT: psrld $16, %xmm0 +; CHECK-NEXT: mulps {{.*}}(%rip), %xmm1 +; CHECK-NEXT: pand {{.*}}(%rip), %xmm0 ; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 -; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: addps %xmm1, %xmm0 ; CHECK-NEXT: retq ; @@ -6991,10 +7372,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 { ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1 ; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 ; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 ; AVX1-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1 ; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; @@ -7078,73 +7459,62 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %xmm1, %rax +; CHECK-NEXT: movq %rax, %rcx +; CHECK-NEXT: shrq %rcx +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: js .LBB182_1 -; CHECK-NEXT: # %bb.2: # %entry -; CHECK-NEXT: cvtsi2ss %rax, %xmm2 +; CHECK-NEXT: cmovnsq %rax, %rdx +; CHECK-NEXT: cvtsi2ss %rdx, %xmm2 +; CHECK-NEXT: jns .LBB182_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addss %xmm2, %xmm2 +; CHECK-NEXT: .LBB182_2: # %entry ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; CHECK-NEXT: movq %xmm1, %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jns .LBB182_5 -; CHECK-NEXT: .LBB182_4: ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: shrq %rcx -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: orq %rcx, %rax -; CHECK-NEXT: cvtsi2ss %rax, %xmm3 +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: cmovnsq %rax, %rdx +; CHECK-NEXT: cvtsi2ss %rdx, %xmm3 +; CHECK-NEXT: jns .LBB182_4 +; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: addss %xmm3, %xmm3 +; CHECK-NEXT: .LBB182_4: # %entry ; CHECK-NEXT: movq %xmm0, %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jns .LBB182_8 -; CHECK-NEXT: .LBB182_7: ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: shrq %rcx -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: orq %rcx, %rax -; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ss %rax, %xmm1 -; CHECK-NEXT: addss %xmm1, %xmm1 -; CHECK-NEXT: jmp .LBB182_9 -; CHECK-NEXT: .LBB182_1: -; CHECK-NEXT: movq %rax, %rcx -; CHECK-NEXT: shrq %rcx -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: orq %rcx, %rax -; CHECK-NEXT: cvtsi2ss %rax, %xmm2 -; CHECK-NEXT: addss %xmm2, %xmm2 -; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; CHECK-NEXT: movq %xmm1, %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: js .LBB182_4 -; CHECK-NEXT: .LBB182_5: # %entry -; CHECK-NEXT: cvtsi2ss %rax, %xmm3 -; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: orq %rcx, %rdx ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: js .LBB182_7 -; CHECK-NEXT: .LBB182_8: # %entry +; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm1, %xmm1 -; CHECK-NEXT: cvtsi2ss %rax, %xmm1 -; CHECK-NEXT: .LBB182_9: # %entry +; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 +; CHECK-NEXT: jns .LBB182_6 +; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: addss %xmm1, %xmm1 +; CHECK-NEXT: .LBB182_6: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; CHECK-NEXT: movq %xmm0, %rax -; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: js .LBB182_10 -; CHECK-NEXT: # %bb.11: # %entry -; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2ss %rax, %xmm0 -; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB182_10: ; CHECK-NEXT: movq %rax, %rcx ; CHECK-NEXT: shrq %rcx -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: orq %rcx, %rax +; CHECK-NEXT: movl %eax, %edx +; CHECK-NEXT: andl $1, %edx +; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: testq %rax, %rax +; CHECK-NEXT: cmovnsq %rax, %rdx ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: cvtsi2ss %rax, %xmm0 +; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 +; CHECK-NEXT: jns .LBB182_8 +; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: addss %xmm0, %xmm0 +; CHECK-NEXT: .LBB182_8: # %entry ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; CHECK-NEXT: movaps %xmm1, %xmm0 @@ -7153,68 +7523,60 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: vpextrq $1, %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB182_1 -; AVX1-NEXT: # %bb.2: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 -; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: jns .LBB182_5 -; AVX1-NEXT: .LBB182_4: -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 -; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: jmp .LBB182_6 -; AVX1-NEXT: .LBB182_1: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1 +; AVX1-NEXT: jns .LBB182_2 +; AVX1-NEXT: # %bb.1: ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 +; AVX1-NEXT: .LBB182_2: # %entry ; AVX1-NEXT: vmovq %xmm0, %rax +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB182_4 -; AVX1-NEXT: .LBB182_5: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 -; AVX1-NEXT: .LBB182_6: # %entry +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2 +; AVX1-NEXT: jns .LBB182_4 +; AVX1-NEXT: # %bb.3: +; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: .LBB182_4: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB182_7 -; AVX1-NEXT: # %bb.8: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 -; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] -; AVX1-NEXT: vpextrq $1, %xmm0, %rax -; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: jns .LBB182_11 -; AVX1-NEXT: .LBB182_10: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 -; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; AVX1-NEXT: .LBB182_7: -; AVX1-NEXT: movq %rax, %rcx -; AVX1-NEXT: shrq %rcx -; AVX1-NEXT: andl $1, %eax -; AVX1-NEXT: orq %rcx, %rax -; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx +; AVX1-NEXT: testq %rax, %rax +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2 +; AVX1-NEXT: jns .LBB182_6 +; AVX1-NEXT: # %bb.5: ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: .LBB182_6: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax +; AVX1-NEXT: movq %rax, %rcx +; AVX1-NEXT: shrq %rcx +; AVX1-NEXT: movl %eax, %edx +; AVX1-NEXT: andl $1, %edx +; AVX1-NEXT: orq %rcx, %rdx ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB182_10 -; AVX1-NEXT: .LBB182_11: # %entry -; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 +; AVX1-NEXT: cmovnsq %rax, %rdx +; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0 +; AVX1-NEXT: jns .LBB182_8 +; AVX1-NEXT: # %bb.7: +; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: .LBB182_8: # %entry ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -7238,39 +7600,28 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { ; ; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64: ; AVX512DQ: # %bb.0: # %entry -; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vmovq %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm2 -; AVX512DQ-NEXT: vmovq %xmm2, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] -; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] +; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1] ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm2 ; AVX512DQ-NEXT: vpsrlq $1, %ymm0, %ymm3 ; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax +; AVX512DQ-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 +; AVX512DQ-NEXT: vmovq %xmm0, %rax ; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 -; AVX512DQ-NEXT: vmovq %xmm2, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] -; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2 -; AVX512DQ-NEXT: vmovq %xmm2, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] -; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax -; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0] -; AVX512DQ-NEXT: vaddps %xmm2, %xmm2, %xmm2 -; AVX512DQ-NEXT: vxorps %xmm3, %xmm3, %xmm3 -; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm3, %ymm0 -; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512DQ-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] +; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512DQ-NEXT: vmovq %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] +; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax +; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0 +; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] +; AVX512DQ-NEXT: vaddps %xmm0, %xmm0, %xmm2 +; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1 +; AVX512DQ-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper ; AVX512DQ-NEXT: retq entry: |