summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll6
-rw-r--r--llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll6
-rw-r--r--llvm/test/CodeGen/X86/fp-intrinsics-flags.ll26
-rw-r--r--llvm/test/CodeGen/X86/fp-intrinsics.ll124
-rw-r--r--llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll224
-rw-r--r--llvm/test/CodeGen/X86/fp80-strict-scalar.ll4
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll719
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll210
-rw-r--r--llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll32
-rw-r--r--llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll1193
10 files changed, 1510 insertions, 1034 deletions
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll
index 9de37a78c10..5327eccdcce 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-10.ll
@@ -19,7 +19,7 @@ define i32 @f1(float %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI0_0
; CHECK-NEXT: le %f1, 0(%r1)
-; CHECK-NEXT: cebr %f0, %f1
+; CHECK-NEXT: kebr %f0, %f1
; CHECK-NEXT: jnl .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lhi %r0, 0
@@ -43,7 +43,7 @@ define i32 @f2(double %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI1_0
; CHECK-NEXT: ldeb %f1, 0(%r1)
-; CHECK-NEXT: cdbr %f0, %f1
+; CHECK-NEXT: kdbr %f0, %f1
; CHECK-NEXT: jnl .LBB1_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lhi %r0, 0
@@ -69,7 +69,7 @@ define i32 @f3(fp128 *%src) #0 {
; CHECK-NEXT: ld %f2, 8(%r2)
; CHECK-NEXT: larl %r1, .LCPI2_0
; CHECK-NEXT: lxeb %f1, 0(%r1)
-; CHECK-NEXT: cxbr %f0, %f1
+; CHECK-NEXT: kxbr %f0, %f1
; CHECK-NEXT: jnl .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lhi %r0, 0
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll
index f45902d8b3c..e7ed6af330c 100644
--- a/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-strict-conv-12.ll
@@ -18,7 +18,7 @@ define i64 @f1(float %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI0_0
; CHECK-NEXT: le %f1, 0(%r1)
-; CHECK-NEXT: cebr %f0, %f1
+; CHECK-NEXT: kebr %f0, %f1
; CHECK-NEXT: jnl .LBB0_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lghi %r0, 0
@@ -42,7 +42,7 @@ define i64 @f2(double %f) #0 {
; CHECK: # %bb.0:
; CHECK-NEXT: larl %r1, .LCPI1_0
; CHECK-NEXT: ldeb %f1, 0(%r1)
-; CHECK-NEXT: cdbr %f0, %f1
+; CHECK-NEXT: kdbr %f0, %f1
; CHECK-NEXT: jnl .LBB1_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lghi %r0, 0
@@ -68,7 +68,7 @@ define i64 @f3(fp128 *%src) #0 {
; CHECK-NEXT: ld %f2, 8(%r2)
; CHECK-NEXT: larl %r1, .LCPI2_0
; CHECK-NEXT: lxeb %f1, 0(%r1)
-; CHECK-NEXT: cxbr %f0, %f1
+; CHECK-NEXT: kxbr %f0, %f1
; CHECK-NEXT: jnl .LBB2_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: lghi %r0, 0
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
index 623e63a93e4..5df3d8f7a4a 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics-flags.ll
@@ -29,13 +29,13 @@ entry:
; CHECK-LABEL: name: f20u64
; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
-; CHECK: [[CMPSDrr:%[0-9]+]]:fr64 = CMPSDrr [[MOVSDrm_alt]], [[MOVSDrm_alt1]], 1, implicit $mxcsr
-; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[CMPSDrr]]
-; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[MOVSDrm_alt1]]
-; CHECK: [[PANDNrr:%[0-9]+]]:vr128 = PANDNrr [[COPY]], killed [[COPY1]]
-; CHECK: [[COPY2:%[0-9]+]]:fr64 = COPY [[PANDNrr]]
-; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[COPY2]], implicit $mxcsr
+; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD
+; CHECK: JCC_1
+; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}}
+; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
; CHECK: MOVSDmr %stack.0, 1, $noreg, 0, $noreg, killed [[SUBSDrr]] :: (store 8 into %stack.0)
+; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
; CHECK: [[LD_Fp64m:%[0-9]+]]:rfp64 = LD_Fp64m %stack.0, 1, $noreg, 0, $noreg, implicit-def dead $fpsw, implicit $fpcw :: (load 8 from %stack.0)
; CHECK: FNSTCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit $fpcw :: (store 2 into %stack.1)
; CHECK: [[MOVZX32rm16_:%[0-9]+]]:gr32 = MOVZX32rm16 %stack.1, 1, $noreg, 0, $noreg :: (load 2 from %stack.1)
@@ -45,8 +45,6 @@ entry:
; CHECK: FLDCW16m %stack.2, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load 2 from %stack.2)
; CHECK: IST_Fp64m64 %stack.0, 1, $noreg, 0, $noreg, [[LD_Fp64m]], implicit-def $fpsw, implicit $fpcw
; CHECK: FLDCW16m %stack.1, 1, $noreg, 0, $noreg, implicit-def $fpsw, implicit-def $fpcw :: (load 2 from %stack.1)
-; CHECK: UCOMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
-; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
; CHECK: [[SHL32ri:%[0-9]+]]:gr32 = SHL32ri [[MOVZX32rr8_]], 31, implicit-def dead $eflags
; CHECK: [[XOR32rm:%[0-9]+]]:gr32 = XOR32rm [[SHL32ri]], %stack.0, 1, $noreg, 4, $noreg, implicit-def dead $eflags :: (load 4 from %stack.0 + 4)
@@ -86,16 +84,14 @@ entry:
; CHECK-LABEL: name: f20u
; CHECK: [[MOVSDrm_alt:%[0-9]+]]:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load 8 from %fixed-stack.0, align 16)
; CHECK: [[MOVSDrm_alt1:%[0-9]+]]:fr64 = MOVSDrm_alt $noreg, 1, $noreg, %const.0, $noreg :: (load 8 from constant-pool)
-; CHECK: UCOMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: COMISDrr [[MOVSDrm_alt1]], [[MOVSDrm_alt]], implicit-def $eflags, implicit $mxcsr
+; CHECK: [[FsFLD0SD:%[0-9]+]]:fr64 = FsFLD0SD
+; CHECK: JCC_1
+; CHECK: [[PHI:%[0-9]+]]:fr64 = PHI [[MOVSDrm_alt1]], {{.*}}, [[FsFLD0SD]], {{.*}}
; CHECK: [[SETCCr:%[0-9]+]]:gr8 = SETCCr 6, implicit $eflags
; CHECK: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 killed [[SETCCr]]
; CHECK: [[SHL32ri:%[0-9]+]]:gr32 = SHL32ri [[MOVZX32rr8_]], 31, implicit-def dead $eflags
-; CHECK: [[CMPSDrr:%[0-9]+]]:fr64 = CMPSDrr [[MOVSDrm_alt]], [[MOVSDrm_alt1]], 1, implicit $mxcsr
-; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY [[CMPSDrr]]
-; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[MOVSDrm_alt1]]
-; CHECK: [[PANDNrr:%[0-9]+]]:vr128 = PANDNrr [[COPY]], killed [[COPY1]]
-; CHECK: [[COPY2:%[0-9]+]]:fr64 = COPY [[PANDNrr]]
-; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[COPY2]], implicit $mxcsr
+; CHECK: [[SUBSDrr:%[0-9]+]]:fr64 = SUBSDrr [[MOVSDrm_alt]], killed [[PHI]], implicit $mxcsr
; CHECK: [[CVTTSD2SIrr:%[0-9]+]]:gr32 = CVTTSD2SIrr killed [[SUBSDrr]], implicit $mxcsr
; CHECK: [[XOR32rr:%[0-9]+]]:gr32 = XOR32rr [[CVTTSD2SIrr]], killed [[SHL32ri]], implicit-def dead $eflags
; CHECK: $eax = COPY [[XOR32rr]]
diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll
index 124a43f53dd..2135cdb0404 100644
--- a/llvm/test/CodeGen/X86/fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll
@@ -1277,15 +1277,17 @@ define i32 @f20u(double %x) #0 {
; X86-SSE-LABEL: f20u:
; X86-SSE: # %bb.0: # %entry
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE-NEXT: xorl %ecx, %ecx
-; X86-SSE-NEXT: ucomisd %xmm0, %xmm1
-; X86-SSE-NEXT: setbe %cl
+; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; X86-SSE-NEXT: comisd %xmm0, %xmm2
+; X86-SSE-NEXT: xorpd %xmm1, %xmm1
+; X86-SSE-NEXT: ja .LBB24_2
+; X86-SSE-NEXT: # %bb.1: # %entry
+; X86-SSE-NEXT: movapd %xmm2, %xmm1
+; X86-SSE-NEXT: .LBB24_2: # %entry
+; X86-SSE-NEXT: setbe %al
+; X86-SSE-NEXT: movzbl %al, %ecx
; X86-SSE-NEXT: shll $31, %ecx
-; X86-SSE-NEXT: movapd %xmm0, %xmm2
-; X86-SSE-NEXT: cmpltsd %xmm1, %xmm2
-; X86-SSE-NEXT: andnpd %xmm1, %xmm2
-; X86-SSE-NEXT: subsd %xmm2, %xmm0
+; X86-SSE-NEXT: subsd %xmm1, %xmm0
; X86-SSE-NEXT: cvttsd2si %xmm0, %eax
; X86-SSE-NEXT: xorl %ecx, %eax
; X86-SSE-NEXT: retl
@@ -1324,7 +1326,7 @@ define i64 @f20u64(double %x) #0 {
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: flds {{\.LCPI.*}}
; X87-NEXT: xorl %edx, %edx
-; X87-NEXT: fucomi %st(1), %st
+; X87-NEXT: fcomi %st(1), %st
; X87-NEXT: setbe %dl
; X87-NEXT: fldz
; X87-NEXT: fxch %st(1)
@@ -1350,24 +1352,25 @@ define i64 @f20u64(double %x) #0 {
; X86-SSE-NEXT: subl $20, %esp
; X86-SSE-NEXT: .cfi_def_cfa_offset 24
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; X86-SSE-NEXT: movapd %xmm0, %xmm2
-; X86-SSE-NEXT: cmpltsd %xmm1, %xmm2
-; X86-SSE-NEXT: andnpd %xmm1, %xmm2
-; X86-SSE-NEXT: movapd %xmm0, %xmm3
-; X86-SSE-NEXT: subsd %xmm2, %xmm3
-; X86-SSE-NEXT: movsd %xmm3, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; X86-SSE-NEXT: comisd %xmm0, %xmm2
+; X86-SSE-NEXT: xorpd %xmm1, %xmm1
+; X86-SSE-NEXT: ja .LBB25_2
+; X86-SSE-NEXT: # %bb.1: # %entry
+; X86-SSE-NEXT: movapd %xmm2, %xmm1
+; X86-SSE-NEXT: .LBB25_2: # %entry
+; X86-SSE-NEXT: subsd %xmm1, %xmm0
+; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: setbe %al
; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00
-; X86-SSE-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-SSE-NEXT: orl $3072, %ecx # imm = 0xC00
+; X86-SSE-NEXT: movw %cx, {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fistpll {{[0-9]+}}(%esp)
; X86-SSE-NEXT: fldcw {{[0-9]+}}(%esp)
-; X86-SSE-NEXT: xorl %edx, %edx
-; X86-SSE-NEXT: ucomisd %xmm0, %xmm1
-; X86-SSE-NEXT: setbe %dl
+; X86-SSE-NEXT: movzbl %al, %edx
; X86-SSE-NEXT: shll $31, %edx
; X86-SSE-NEXT: xorl {{[0-9]+}}(%esp), %edx
; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1377,30 +1380,35 @@ define i64 @f20u64(double %x) #0 {
;
; SSE-LABEL: f20u64:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: xorl %ecx, %ecx
-; SSE-NEXT: ucomisd %xmm1, %xmm0
-; SSE-NEXT: setae %cl
-; SSE-NEXT: shlq $63, %rcx
-; SSE-NEXT: movapd %xmm0, %xmm2
-; SSE-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-NEXT: andnpd %xmm1, %xmm2
-; SSE-NEXT: subsd %xmm2, %xmm0
-; SSE-NEXT: cvttsd2si %xmm0, %rax
+; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-NEXT: comisd %xmm2, %xmm0
+; SSE-NEXT: xorpd %xmm1, %xmm1
+; SSE-NEXT: jb .LBB25_2
+; SSE-NEXT: # %bb.1: # %entry
+; SSE-NEXT: movapd %xmm2, %xmm1
+; SSE-NEXT: .LBB25_2: # %entry
+; SSE-NEXT: subsd %xmm1, %xmm0
+; SSE-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-NEXT: setae %al
+; SSE-NEXT: movzbl %al, %eax
+; SSE-NEXT: shlq $63, %rax
; SSE-NEXT: xorq %rcx, %rax
; SSE-NEXT: retq
;
; AVX1-LABEL: f20u64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: xorl %ecx, %ecx
-; AVX1-NEXT: vucomisd %xmm1, %xmm0
-; AVX1-NEXT: setae %cl
-; AVX1-NEXT: shlq $63, %rcx
-; AVX1-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX1-NEXT: vandnpd %xmm1, %xmm2, %xmm1
-; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vcvttsd2si %xmm0, %rax
+; AVX1-NEXT: vcomisd %xmm1, %xmm0
+; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: jb .LBB25_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-NEXT: .LBB25_2: # %entry
+; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX1-NEXT: setae %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: retq
;
@@ -2656,34 +2664,34 @@ define float @uiffl(i64 %x) #0 {
;
; SSE-LABEL: uiffl:
; SSE: # %bb.0: # %entry
-; SSE-NEXT: testq %rdi, %rdi
-; SSE-NEXT: js .LBB52_1
-; SSE-NEXT: # %bb.2: # %entry
-; SSE-NEXT: cvtsi2ss %rdi, %xmm0
-; SSE-NEXT: retq
-; SSE-NEXT: .LBB52_1:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: shrq %rax
-; SSE-NEXT: andl $1, %edi
-; SSE-NEXT: orq %rax, %rdi
-; SSE-NEXT: cvtsi2ss %rdi, %xmm0
+; SSE-NEXT: movl %edi, %ecx
+; SSE-NEXT: andl $1, %ecx
+; SSE-NEXT: orq %rax, %rcx
+; SSE-NEXT: testq %rdi, %rdi
+; SSE-NEXT: cmovnsq %rdi, %rcx
+; SSE-NEXT: cvtsi2ss %rcx, %xmm0
+; SSE-NEXT: jns .LBB52_2
+; SSE-NEXT: # %bb.1:
; SSE-NEXT: addss %xmm0, %xmm0
+; SSE-NEXT: .LBB52_2: # %entry
; SSE-NEXT: retq
;
; AVX1-LABEL: uiffl:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: testq %rdi, %rdi
-; AVX1-NEXT: js .LBB52_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB52_1:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: shrq %rax
-; AVX1-NEXT: andl $1, %edi
-; AVX1-NEXT: orq %rax, %rdi
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: movl %edi, %ecx
+; AVX1-NEXT: andl $1, %ecx
+; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: testq %rdi, %rdi
+; AVX1-NEXT: cmovnsq %rdi, %rcx
+; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
+; AVX1-NEXT: jns .LBB52_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB52_2: # %entry
; AVX1-NEXT: retq
;
; AVX512-LABEL: uiffl:
diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
index 680acf98d2a..dc54f15a650 100644
--- a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
+++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll
@@ -437,15 +437,17 @@ define i32 @fptoui_f32toi32(float %x) #0 {
; SSE-X86-LABEL: fptoui_f32toi32:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: xorl %ecx, %ecx
-; SSE-X86-NEXT: ucomiss %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %cl
+; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-X86-NEXT: comiss %xmm0, %xmm2
+; SSE-X86-NEXT: xorps %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB8_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movaps %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB8_2:
+; SSE-X86-NEXT: setbe %al
+; SSE-X86-NEXT: movzbl %al, %ecx
; SSE-X86-NEXT: shll $31, %ecx
-; SSE-X86-NEXT: movaps %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltss %xmm1, %xmm2
-; SSE-X86-NEXT: andnps %xmm1, %xmm2
-; SSE-X86-NEXT: subss %xmm2, %xmm0
+; SSE-X86-NEXT: subss %xmm1, %xmm0
; SSE-X86-NEXT: cvttss2si %xmm0, %eax
; SSE-X86-NEXT: xorl %ecx, %eax
; SSE-X86-NEXT: retl
@@ -529,24 +531,25 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; SSE-X86-NEXT: andl $-8, %esp
; SSE-X86-NEXT: subl $16, %esp
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-X86-NEXT: movaps %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltss %xmm1, %xmm2
-; SSE-X86-NEXT: andnps %xmm1, %xmm2
-; SSE-X86-NEXT: movaps %xmm0, %xmm3
-; SSE-X86-NEXT: subss %xmm2, %xmm3
-; SSE-X86-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-X86-NEXT: comiss %xmm0, %xmm2
+; SSE-X86-NEXT: xorps %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB9_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movaps %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB9_2:
+; SSE-X86-NEXT: subss %xmm1, %xmm0
+; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: setbe %al
; SSE-X86-NEXT: flds {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-X86-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: xorl %edx, %edx
-; SSE-X86-NEXT: ucomiss %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %dl
+; SSE-X86-NEXT: movzbl %al, %edx
; SSE-X86-NEXT: shll $31, %edx
; SSE-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -557,16 +560,18 @@ define i64 @fptoui_f32toi64(float %x) #0 {
;
; SSE-X64-LABEL: fptoui_f32toi64:
; SSE-X64: # %bb.0:
-; SSE-X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-X64-NEXT: xorl %ecx, %ecx
-; SSE-X64-NEXT: ucomiss %xmm1, %xmm0
-; SSE-X64-NEXT: setae %cl
-; SSE-X64-NEXT: shlq $63, %rcx
-; SSE-X64-NEXT: movaps %xmm0, %xmm2
-; SSE-X64-NEXT: cmpltss %xmm1, %xmm2
-; SSE-X64-NEXT: andnps %xmm1, %xmm2
-; SSE-X64-NEXT: subss %xmm2, %xmm0
-; SSE-X64-NEXT: cvttss2si %xmm0, %rax
+; SSE-X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-X64-NEXT: comiss %xmm2, %xmm0
+; SSE-X64-NEXT: xorps %xmm1, %xmm1
+; SSE-X64-NEXT: jb .LBB9_2
+; SSE-X64-NEXT: # %bb.1:
+; SSE-X64-NEXT: movaps %xmm2, %xmm1
+; SSE-X64-NEXT: .LBB9_2:
+; SSE-X64-NEXT: subss %xmm1, %xmm0
+; SSE-X64-NEXT: cvttss2si %xmm0, %rcx
+; SSE-X64-NEXT: setae %al
+; SSE-X64-NEXT: movzbl %al, %eax
+; SSE-X64-NEXT: shlq $63, %rax
; SSE-X64-NEXT: xorq %rcx, %rax
; SSE-X64-NEXT: retq
;
@@ -581,15 +586,18 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX1-X86-NEXT: subl $8, %esp
; AVX1-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX1-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-X86-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX1-X86-NEXT: vandnps %xmm1, %xmm2, %xmm2
-; AVX1-X86-NEXT: vsubss %xmm2, %xmm0, %xmm2
-; AVX1-X86-NEXT: vmovss %xmm2, (%esp)
+; AVX1-X86-NEXT: vcomiss %xmm0, %xmm1
+; AVX1-X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-X86-NEXT: ja .LBB9_2
+; AVX1-X86-NEXT: # %bb.1:
+; AVX1-X86-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-X86-NEXT: .LBB9_2:
+; AVX1-X86-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX1-X86-NEXT: vmovss %xmm0, (%esp)
; AVX1-X86-NEXT: flds (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
-; AVX1-X86-NEXT: xorl %edx, %edx
-; AVX1-X86-NEXT: vucomiss %xmm0, %xmm1
-; AVX1-X86-NEXT: setbe %dl
+; AVX1-X86-NEXT: setbe %al
+; AVX1-X86-NEXT: movzbl %al, %edx
; AVX1-X86-NEXT: shll $31, %edx
; AVX1-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX1-X86-NEXT: movl (%esp), %eax
@@ -601,14 +609,17 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX1-X64-LABEL: fptoui_f32toi64:
; AVX1-X64: # %bb.0:
; AVX1-X64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-X64-NEXT: xorl %ecx, %ecx
-; AVX1-X64-NEXT: vucomiss %xmm1, %xmm0
-; AVX1-X64-NEXT: setae %cl
-; AVX1-X64-NEXT: shlq $63, %rcx
-; AVX1-X64-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX1-X64-NEXT: vandnps %xmm1, %xmm2, %xmm1
-; AVX1-X64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX1-X64-NEXT: vcvttss2si %xmm0, %rax
+; AVX1-X64-NEXT: vcomiss %xmm1, %xmm0
+; AVX1-X64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-X64-NEXT: jb .LBB9_2
+; AVX1-X64-NEXT: # %bb.1:
+; AVX1-X64-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-X64-NEXT: .LBB9_2:
+; AVX1-X64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX1-X64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX1-X64-NEXT: setae %al
+; AVX1-X64-NEXT: movzbl %al, %eax
+; AVX1-X64-NEXT: shlq $63, %rax
; AVX1-X64-NEXT: xorq %rcx, %rax
; AVX1-X64-NEXT: retq
;
@@ -623,10 +634,11 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; AVX512-X86-NEXT: subl $8, %esp
; AVX512-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX512-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512-X86-NEXT: vcmpltss %xmm1, %xmm0, %k1
-; AVX512-X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: xorl %edx, %edx
-; AVX512-X86-NEXT: vucomiss %xmm0, %xmm1
+; AVX512-X86-NEXT: vcomiss %xmm0, %xmm1
+; AVX512-X86-NEXT: seta %al
+; AVX512-X86-NEXT: kmovw %eax, %k1
+; AVX512-X86-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
; AVX512-X86-NEXT: vsubss %xmm1, %xmm0, %xmm0
; AVX512-X86-NEXT: vmovss %xmm0, (%esp)
@@ -657,7 +669,7 @@ define i64 @fptoui_f32toi64(float %x) #0 {
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: flds 8(%ebp)
; CHECK-NEXT: flds {{\.LCPI.*}}
-; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fcom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
@@ -1054,15 +1066,17 @@ define i32 @fptoui_f64toi32(double %x) #0 {
; SSE-X86-LABEL: fptoui_f64toi32:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-X86-NEXT: xorl %ecx, %ecx
-; SSE-X86-NEXT: ucomisd %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %cl
+; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-X86-NEXT: comisd %xmm0, %xmm2
+; SSE-X86-NEXT: xorpd %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB17_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movapd %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB17_2:
+; SSE-X86-NEXT: setbe %al
+; SSE-X86-NEXT: movzbl %al, %ecx
; SSE-X86-NEXT: shll $31, %ecx
-; SSE-X86-NEXT: movapd %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-X86-NEXT: andnpd %xmm1, %xmm2
-; SSE-X86-NEXT: subsd %xmm2, %xmm0
+; SSE-X86-NEXT: subsd %xmm1, %xmm0
; SSE-X86-NEXT: cvttsd2si %xmm0, %eax
; SSE-X86-NEXT: xorl %ecx, %eax
; SSE-X86-NEXT: retl
@@ -1146,24 +1160,25 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; SSE-X86-NEXT: andl $-8, %esp
; SSE-X86-NEXT: subl $16, %esp
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-X86-NEXT: movapd %xmm0, %xmm2
-; SSE-X86-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-X86-NEXT: andnpd %xmm1, %xmm2
-; SSE-X86-NEXT: movapd %xmm0, %xmm3
-; SSE-X86-NEXT: subsd %xmm2, %xmm3
-; SSE-X86-NEXT: movsd %xmm3, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-X86-NEXT: comisd %xmm0, %xmm2
+; SSE-X86-NEXT: xorpd %xmm1, %xmm1
+; SSE-X86-NEXT: ja .LBB18_2
+; SSE-X86-NEXT: # %bb.1:
+; SSE-X86-NEXT: movapd %xmm2, %xmm1
+; SSE-X86-NEXT: .LBB18_2:
+; SSE-X86-NEXT: subsd %xmm1, %xmm0
+; SSE-X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: setbe %al
; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-X86-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-X86-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldcw {{[0-9]+}}(%esp)
-; SSE-X86-NEXT: xorl %edx, %edx
-; SSE-X86-NEXT: ucomisd %xmm0, %xmm1
-; SSE-X86-NEXT: setbe %dl
+; SSE-X86-NEXT: movzbl %al, %edx
; SSE-X86-NEXT: shll $31, %edx
; SSE-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
@@ -1174,16 +1189,18 @@ define i64 @fptoui_f64toi64(double %x) #0 {
;
; SSE-X64-LABEL: fptoui_f64toi64:
; SSE-X64: # %bb.0:
-; SSE-X64-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-X64-NEXT: xorl %ecx, %ecx
-; SSE-X64-NEXT: ucomisd %xmm1, %xmm0
-; SSE-X64-NEXT: setae %cl
-; SSE-X64-NEXT: shlq $63, %rcx
-; SSE-X64-NEXT: movapd %xmm0, %xmm2
-; SSE-X64-NEXT: cmpltsd %xmm1, %xmm2
-; SSE-X64-NEXT: andnpd %xmm1, %xmm2
-; SSE-X64-NEXT: subsd %xmm2, %xmm0
-; SSE-X64-NEXT: cvttsd2si %xmm0, %rax
+; SSE-X64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; SSE-X64-NEXT: comisd %xmm2, %xmm0
+; SSE-X64-NEXT: xorpd %xmm1, %xmm1
+; SSE-X64-NEXT: jb .LBB18_2
+; SSE-X64-NEXT: # %bb.1:
+; SSE-X64-NEXT: movapd %xmm2, %xmm1
+; SSE-X64-NEXT: .LBB18_2:
+; SSE-X64-NEXT: subsd %xmm1, %xmm0
+; SSE-X64-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-X64-NEXT: setae %al
+; SSE-X64-NEXT: movzbl %al, %eax
+; SSE-X64-NEXT: shlq $63, %rax
; SSE-X64-NEXT: xorq %rcx, %rax
; SSE-X64-NEXT: retq
;
@@ -1198,15 +1215,18 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX1-X86-NEXT: subl $8, %esp
; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX1-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-X86-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX1-X86-NEXT: vandnpd %xmm1, %xmm2, %xmm2
-; AVX1-X86-NEXT: vsubsd %xmm2, %xmm0, %xmm2
-; AVX1-X86-NEXT: vmovsd %xmm2, (%esp)
+; AVX1-X86-NEXT: vcomisd %xmm0, %xmm1
+; AVX1-X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-X86-NEXT: ja .LBB18_2
+; AVX1-X86-NEXT: # %bb.1:
+; AVX1-X86-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-X86-NEXT: .LBB18_2:
+; AVX1-X86-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-X86-NEXT: vmovsd %xmm0, (%esp)
; AVX1-X86-NEXT: fldl (%esp)
; AVX1-X86-NEXT: fisttpll (%esp)
-; AVX1-X86-NEXT: xorl %edx, %edx
-; AVX1-X86-NEXT: vucomisd %xmm0, %xmm1
-; AVX1-X86-NEXT: setbe %dl
+; AVX1-X86-NEXT: setbe %al
+; AVX1-X86-NEXT: movzbl %al, %edx
; AVX1-X86-NEXT: shll $31, %edx
; AVX1-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX1-X86-NEXT: movl (%esp), %eax
@@ -1218,14 +1238,17 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX1-X64-LABEL: fptoui_f64toi64:
; AVX1-X64: # %bb.0:
; AVX1-X64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-X64-NEXT: xorl %ecx, %ecx
-; AVX1-X64-NEXT: vucomisd %xmm1, %xmm0
-; AVX1-X64-NEXT: setae %cl
-; AVX1-X64-NEXT: shlq $63, %rcx
-; AVX1-X64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX1-X64-NEXT: vandnpd %xmm1, %xmm2, %xmm1
-; AVX1-X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX1-X64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX1-X64-NEXT: vcomisd %xmm1, %xmm0
+; AVX1-X64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-X64-NEXT: jb .LBB18_2
+; AVX1-X64-NEXT: # %bb.1:
+; AVX1-X64-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-X64-NEXT: .LBB18_2:
+; AVX1-X64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-X64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX1-X64-NEXT: setae %al
+; AVX1-X64-NEXT: movzbl %al, %eax
+; AVX1-X64-NEXT: shlq $63, %rax
; AVX1-X64-NEXT: xorq %rcx, %rax
; AVX1-X64-NEXT: retq
;
@@ -1240,10 +1263,11 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; AVX512-X86-NEXT: subl $8, %esp
; AVX512-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX512-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512-X86-NEXT: vcmpltsd %xmm1, %xmm0, %k1
-; AVX512-X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: xorl %edx, %edx
-; AVX512-X86-NEXT: vucomisd %xmm0, %xmm1
+; AVX512-X86-NEXT: vcomisd %xmm0, %xmm1
+; AVX512-X86-NEXT: seta %al
+; AVX512-X86-NEXT: kmovw %eax, %k1
+; AVX512-X86-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX512-X86-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1}
; AVX512-X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX512-X86-NEXT: vmovsd %xmm0, (%esp)
@@ -1274,7 +1298,7 @@ define i64 @fptoui_f64toi64(double %x) #0 {
; CHECK-NEXT: subl $16, %esp
; CHECK-NEXT: fldl 8(%ebp)
; CHECK-NEXT: flds {{\.LCPI.*}}
-; CHECK-NEXT: fucom %st(1)
+; CHECK-NEXT: fcom %st(1)
; CHECK-NEXT: fnstsw %ax
; CHECK-NEXT: xorl %edx, %edx
; CHECK-NEXT: # kill: def $ah killed $ah killed $ax
diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
index 51ffc1c48ee..0df9f33fb07 100644
--- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
+++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll
@@ -543,7 +543,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
; X86-NEXT: subl $16, %esp
; X86-NEXT: fldt 8(%ebp)
; X86-NEXT: flds {{\.LCPI.*}}
-; X86-NEXT: fucom %st(1)
+; X86-NEXT: fcom %st(1)
; X86-NEXT: fnstsw %ax
; X86-NEXT: xorl %edx, %edx
; X86-NEXT: # kill: def $ah killed $ah killed $ax
@@ -579,7 +579,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 {
; X64-NEXT: fldt {{[0-9]+}}(%rsp)
; X64-NEXT: flds {{.*}}(%rip)
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: fucomi %st(1), %st
+; X64-NEXT: fcomi %st(1), %st
; X64-NEXT: setbe %al
; X64-NEXT: fldz
; X64-NEXT: fxch %st(1)
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
index fdefd937e7e..ab5c6b7f998 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll
@@ -176,7 +176,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
; SSE-32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: xorpd %xmm1, %xmm1
; SSE-32-NEXT: xorpd %xmm3, %xmm3
; SSE-32-NEXT: jb .LBB1_2
@@ -196,7 +196,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: jb .LBB1_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm2, %xmm1
@@ -232,29 +232,33 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm1
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm1
-; SSE-64-NEXT: andnpd %xmm2, %xmm1
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: subsd %xmm1, %xmm3
-; SSE-64-NEXT: cvttsd2si %xmm3, %rcx
+; SSE-64-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: xorpd %xmm2, %xmm2
+; SSE-64-NEXT: xorpd %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB1_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movapd %xmm3, %xmm1
+; SSE-64-NEXT: .LBB1_2:
+; SSE-64-NEXT: movapd %xmm0, %xmm4
+; SSE-64-NEXT: subsd %xmm1, %xmm4
+; SSE-64-NEXT: cvttsd2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm3
-; SSE-64-NEXT: andnpd %xmm2, %xmm3
-; SSE-64-NEXT: subsd %xmm3, %xmm0
-; SSE-64-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB1_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movapd %xmm3, %xmm2
+; SSE-64-NEXT: .LBB1_4:
+; SSE-64-NEXT: subsd %xmm2, %xmm0
+; SSE-64-NEXT: cvttsd2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -272,7 +276,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-32-NEXT: subl $16, %esp
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB1_2
@@ -287,7 +291,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX-32-NEXT: vcomisd %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB1_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
@@ -312,28 +316,34 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnpd %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttsd2si %xmm2, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB1_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm3
+; AVX-64-NEXT: .LBB1_2:
+; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB1_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm2
+; AVX-64-NEXT: .LBB1_4:
+; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f64_to_v2i64:
@@ -348,7 +358,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomisd %xmm2, %xmm1
+; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm1
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
@@ -362,7 +372,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomisd %xmm2, %xmm0
+; AVX512VL-32-NEXT: vcomisd %xmm2, %xmm0
; AVX512VL-32-NEXT: setb %dl
; AVX512VL-32-NEXT: kmovw %edx, %k1
; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm2, %xmm2 {%k1}
@@ -559,48 +569,50 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
-; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: cmpltss %xmm1, %xmm2
-; SSE-32-NEXT: andnps %xmm1, %xmm2
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: subss %xmm2, %xmm3
-; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: xorps %xmm3, %xmm3
+; SSE-32-NEXT: jb .LBB3_2
+; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: cmpltss %xmm1, %xmm3
-; SSE-32-NEXT: andnps %xmm1, %xmm3
-; SSE-32-NEXT: movaps %xmm2, %xmm4
+; SSE-32-NEXT: .LBB3_2:
+; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %al
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: jb .LBB3_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm2, %xmm1
+; SSE-32-NEXT: .LBB3_4:
+; SSE-32-NEXT: subss %xmm1, %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw (%esp)
-; SSE-32-NEXT: movzwl (%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl (%esp), %edx
+; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
+; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw (%esp)
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm0
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: movd %eax, %xmm3
+; SSE-32-NEXT: movd %eax, %xmm1
; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm2
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-32-NEXT: movzbl %cl, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: movd %eax, %xmm1
@@ -614,29 +626,33 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm1
-; SSE-64-NEXT: cmpltss %xmm2, %xmm1
-; SSE-64-NEXT: andnps %xmm2, %xmm1
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: subss %xmm1, %xmm3
-; SSE-64-NEXT: cvttss2si %xmm3, %rcx
+; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: xorps %xmm2, %xmm2
+; SSE-64-NEXT: xorps %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB3_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movaps %xmm3, %xmm1
+; SSE-64-NEXT: .LBB3_2:
+; SSE-64-NEXT: movaps %xmm0, %xmm4
+; SSE-64-NEXT: subss %xmm1, %xmm4
+; SSE-64-NEXT: cvttss2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: cmpltss %xmm2, %xmm3
-; SSE-64-NEXT: andnps %xmm2, %xmm3
-; SSE-64-NEXT: subss %xmm3, %xmm0
-; SSE-64-NEXT: cvttss2si %xmm0, %rcx
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB3_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movaps %xmm3, %xmm2
+; SSE-64-NEXT: .LBB3_4:
+; SSE-64-NEXT: subss %xmm2, %xmm0
+; SSE-64-NEXT: cvttss2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -652,28 +668,34 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $16, %esp
-; AVX-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm1, %xmm3
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
+; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-32-NEXT: jb .LBB3_2
+; AVX-32-NEXT: # %bb.1:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX-32-NEXT: .LBB3_2:
+; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm0, %xmm3
-; AVX-32-NEXT: vmovss %xmm3, (%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX-32-NEXT: flds (%esp)
-; AVX-32-NEXT: fisttpll (%esp)
-; AVX-32-NEXT: xorl %eax, %eax
-; AVX-32-NEXT: vucomiss %xmm2, %xmm1
; AVX-32-NEXT: setae %al
+; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: xorl %ecx, %ecx
-; AVX-32-NEXT: vucomiss %xmm2, %xmm0
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
+; AVX-32-NEXT: jb .LBB3_4
+; AVX-32-NEXT: # %bb.3:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm2
+; AVX-32-NEXT: .LBB3_4:
+; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
; AVX-32-NEXT: setae %cl
+; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -688,28 +710,34 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnps %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttss2si %xmm2, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB3_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm3
+; AVX-64-NEXT: .LBB3_2:
+; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttss2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB3_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB3_4:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
@@ -723,28 +751,29 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
; AVX512VL-32-NEXT: subl $16, %esp
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vcmpltss %xmm2, %xmm1, %k1
+; AVX512VL-32-NEXT: xorl %eax, %eax
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm1
+; AVX512VL-32-NEXT: setb %cl
+; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
; AVX512VL-32-NEXT: vmovaps %xmm2, %xmm4
; AVX512VL-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
-; AVX512VL-32-NEXT: vsubss %xmm4, %xmm1, %xmm4
-; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp)
-; AVX512VL-32-NEXT: vcmpltss %xmm2, %xmm0, %k1
-; AVX512VL-32-NEXT: vmovaps %xmm2, %xmm4
-; AVX512VL-32-NEXT: vmovss %xmm3, %xmm4, %xmm4 {%k1}
-; AVX512VL-32-NEXT: vsubss %xmm4, %xmm0, %xmm3
-; AVX512VL-32-NEXT: vmovss %xmm3, (%esp)
+; AVX512VL-32-NEXT: vsubss %xmm4, %xmm1, %xmm1
+; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX512VL-32-NEXT: flds (%esp)
-; AVX512VL-32-NEXT: fisttpll (%esp)
-; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomiss %xmm2, %xmm1
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomiss %xmm2, %xmm0
+; AVX512VL-32-NEXT: vcomiss %xmm2, %xmm0
+; AVX512VL-32-NEXT: setb %dl
+; AVX512VL-32-NEXT: kmovw %edx, %k1
+; AVX512VL-32-NEXT: vmovss %xmm3, %xmm2, %xmm2 {%k1}
+; AVX512VL-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX512VL-32-NEXT: vmovss %xmm0, (%esp)
+; AVX512VL-32-NEXT: flds (%esp)
+; AVX512VL-32-NEXT: fisttpll (%esp)
; AVX512VL-32-NEXT: setae %cl
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
@@ -828,28 +857,32 @@ define <2 x i32> @strict_vector_fptosi_v2f64_to_v2i32(<2 x double> %a) #0 {
define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i32:
; SSE-32: # %bb.0:
-; SSE-32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-32-NEXT: comisd %xmm3, %xmm0
+; SSE-32-NEXT: xorpd %xmm2, %xmm2
+; SSE-32-NEXT: xorpd %xmm1, %xmm1
+; SSE-32-NEXT: jb .LBB5_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movapd %xmm3, %xmm1
+; SSE-32-NEXT: .LBB5_2:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movapd %xmm0, %xmm1
-; SSE-32-NEXT: cmpltsd %xmm2, %xmm1
-; SSE-32-NEXT: andnpd %xmm2, %xmm1
-; SSE-32-NEXT: movapd %xmm0, %xmm3
-; SSE-32-NEXT: subsd %xmm1, %xmm3
-; SSE-32-NEXT: cvttsd2si %xmm3, %ecx
+; SSE-32-NEXT: movapd %xmm0, %xmm4
+; SSE-32-NEXT: subsd %xmm1, %xmm4
+; SSE-32-NEXT: cvttsd2si %xmm4, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm3, %xmm0
+; SSE-32-NEXT: jb .LBB5_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movapd %xmm3, %xmm2
+; SSE-32-NEXT: .LBB5_4:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movapd %xmm0, %xmm3
-; SSE-32-NEXT: cmpltsd %xmm2, %xmm3
-; SSE-32-NEXT: andnpd %xmm2, %xmm3
-; SSE-32-NEXT: subsd %xmm3, %xmm0
+; SSE-32-NEXT: subsd %xmm2, %xmm0
; SSE-32-NEXT: cvttsd2si %xmm0, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm0
@@ -978,28 +1011,32 @@ define <2 x i32> @strict_vector_fptosi_v2f32_to_v2i32(<2 x float> %a) #0 {
define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 {
; SSE-32-LABEL: strict_vector_fptoui_v2f32_to_v2i32:
; SSE-32: # %bb.0:
-; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: xorps %xmm2, %xmm2
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: jb .LBB7_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movaps %xmm3, %xmm1
+; SSE-32-NEXT: .LBB7_2:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm0, %xmm1
-; SSE-32-NEXT: cmpltss %xmm2, %xmm1
-; SSE-32-NEXT: andnps %xmm2, %xmm1
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: subss %xmm1, %xmm3
-; SSE-32-NEXT: cvttss2si %xmm3, %ecx
+; SSE-32-NEXT: movaps %xmm0, %xmm4
+; SSE-32-NEXT: subss %xmm1, %xmm4
+; SSE-32-NEXT: cvttss2si %xmm4, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: jb .LBB7_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm3, %xmm2
+; SSE-32-NEXT: .LBB7_4:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: cmpltss %xmm2, %xmm3
-; SSE-32-NEXT: andnps %xmm2, %xmm3
-; SSE-32-NEXT: subss %xmm3, %xmm0
+; SSE-32-NEXT: subss %xmm2, %xmm0
; SSE-32-NEXT: cvttss2si %xmm0, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm0
@@ -1542,7 +1579,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
; SSE-32-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: xorpd %xmm1, %xmm1
; SSE-32-NEXT: xorpd %xmm3, %xmm3
; SSE-32-NEXT: jb .LBB17_2
@@ -1562,7 +1599,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-32-NEXT: ucomisd %xmm2, %xmm0
+; SSE-32-NEXT: comisd %xmm2, %xmm0
; SSE-32-NEXT: jb .LBB17_4
; SSE-32-NEXT: # %bb.3:
; SSE-32-NEXT: movapd %xmm2, %xmm1
@@ -1598,29 +1635,33 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm1
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm1
-; SSE-64-NEXT: andnpd %xmm2, %xmm1
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: subsd %xmm1, %xmm3
-; SSE-64-NEXT: cvttsd2si %xmm3, %rcx
+; SSE-64-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: xorpd %xmm2, %xmm2
+; SSE-64-NEXT: xorpd %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB17_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movapd %xmm3, %xmm1
+; SSE-64-NEXT: .LBB17_2:
+; SSE-64-NEXT: movapd %xmm0, %xmm4
+; SSE-64-NEXT: subsd %xmm1, %xmm4
+; SSE-64-NEXT: cvttsd2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomisd %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movapd %xmm0, %xmm3
-; SSE-64-NEXT: cmpltsd %xmm2, %xmm3
-; SSE-64-NEXT: andnpd %xmm2, %xmm3
-; SSE-64-NEXT: subsd %xmm3, %xmm0
-; SSE-64-NEXT: cvttsd2si %xmm0, %rcx
+; SSE-64-NEXT: comisd %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB17_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movapd %xmm3, %xmm2
+; SSE-64-NEXT: .LBB17_4:
+; SSE-64-NEXT: subsd %xmm2, %xmm0
+; SSE-64-NEXT: cvttsd2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -1638,7 +1679,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: subl $16, %esp
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB17_2
@@ -1653,7 +1694,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX-32-NEXT: vcomisd %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB17_4
; AVX-32-NEXT: # %bb.3:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
@@ -1678,28 +1719,34 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnpd %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttsd2si %xmm2, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB17_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm3
+; AVX-64-NEXT: .LBB17_2:
+; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttsd2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB17_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm2
+; AVX-64-NEXT: .LBB17_4:
+; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i1:
@@ -1863,48 +1910,50 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; SSE-32-NEXT: .cfi_def_cfa_register %ebp
; SSE-32-NEXT: andl $-8, %esp
; SSE-32-NEXT: subl $24, %esp
-; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: cmpltss %xmm1, %xmm2
-; SSE-32-NEXT: andnps %xmm1, %xmm2
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: subss %xmm2, %xmm3
-; SSE-32-NEXT: movss %xmm3, {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movaps %xmm0, %xmm2
-; SSE-32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
+; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: xorps %xmm1, %xmm1
+; SSE-32-NEXT: xorps %xmm3, %xmm3
+; SSE-32-NEXT: jb .LBB19_2
+; SSE-32-NEXT: # %bb.1:
; SSE-32-NEXT: movaps %xmm2, %xmm3
-; SSE-32-NEXT: cmpltss %xmm1, %xmm3
-; SSE-32-NEXT: andnps %xmm1, %xmm3
-; SSE-32-NEXT: movaps %xmm2, %xmm4
+; SSE-32-NEXT: .LBB19_2:
+; SSE-32-NEXT: movaps %xmm0, %xmm4
; SSE-32-NEXT: subss %xmm3, %xmm4
; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %al
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp)
-; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00
+; SSE-32-NEXT: movw %cx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
+; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE-32-NEXT: comiss %xmm2, %xmm0
+; SSE-32-NEXT: jb .LBB19_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm2, %xmm1
+; SSE-32-NEXT: .LBB19_4:
+; SSE-32-NEXT: subss %xmm1, %xmm0
+; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: setae %cl
; SSE-32-NEXT: flds {{[0-9]+}}(%esp)
; SSE-32-NEXT: fnstcw (%esp)
-; SSE-32-NEXT: movzwl (%esp), %eax
-; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00
-; SSE-32-NEXT: movw %ax, {{[0-9]+}}(%esp)
+; SSE-32-NEXT: movzwl (%esp), %edx
+; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00
+; SSE-32-NEXT: movw %dx, {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp)
; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp)
; SSE-32-NEXT: fldcw (%esp)
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm0
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; SSE-32-NEXT: movd %eax, %xmm3
+; SSE-32-NEXT: movd %eax, %xmm1
; SSE-32-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm1, %xmm2
-; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE-32-NEXT: movzbl %cl, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; SSE-32-NEXT: movd %eax, %xmm1
@@ -1918,29 +1967,33 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
;
; SSE-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; SSE-64: # %bb.0:
-; SSE-64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm1
-; SSE-64-NEXT: cmpltss %xmm2, %xmm1
-; SSE-64-NEXT: andnps %xmm2, %xmm1
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: subss %xmm1, %xmm3
-; SSE-64-NEXT: cvttss2si %xmm3, %rcx
+; SSE-64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: xorps %xmm2, %xmm2
+; SSE-64-NEXT: xorps %xmm1, %xmm1
+; SSE-64-NEXT: jb .LBB19_2
+; SSE-64-NEXT: # %bb.1:
+; SSE-64-NEXT: movaps %xmm3, %xmm1
+; SSE-64-NEXT: .LBB19_2:
+; SSE-64-NEXT: movaps %xmm0, %xmm4
+; SSE-64-NEXT: subss %xmm1, %xmm4
+; SSE-64-NEXT: cvttss2si %xmm4, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm1
; SSE-64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-64-NEXT: xorl %eax, %eax
-; SSE-64-NEXT: ucomiss %xmm2, %xmm0
-; SSE-64-NEXT: setae %al
-; SSE-64-NEXT: shlq $63, %rax
-; SSE-64-NEXT: movaps %xmm0, %xmm3
-; SSE-64-NEXT: cmpltss %xmm2, %xmm3
-; SSE-64-NEXT: andnps %xmm2, %xmm3
-; SSE-64-NEXT: subss %xmm3, %xmm0
-; SSE-64-NEXT: cvttss2si %xmm0, %rcx
+; SSE-64-NEXT: comiss %xmm3, %xmm0
+; SSE-64-NEXT: jb .LBB19_4
+; SSE-64-NEXT: # %bb.3:
+; SSE-64-NEXT: movaps %xmm3, %xmm2
+; SSE-64-NEXT: .LBB19_4:
+; SSE-64-NEXT: subss %xmm2, %xmm0
+; SSE-64-NEXT: cvttss2si %xmm0, %rax
+; SSE-64-NEXT: setae %cl
+; SSE-64-NEXT: movzbl %cl, %ecx
+; SSE-64-NEXT: shlq $63, %rcx
; SSE-64-NEXT: xorq %rax, %rcx
; SSE-64-NEXT: movq %rcx, %xmm0
; SSE-64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
@@ -1956,28 +2009,34 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-32-NEXT: .cfi_def_cfa_register %ebp
; AVX-32-NEXT: andl $-8, %esp
; AVX-32-NEXT: subl $16, %esp
-; AVX-32-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; AVX-32-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm1, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm1, %xmm3
+; AVX-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
+; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-32-NEXT: jb .LBB19_2
+; AVX-32-NEXT: # %bb.1:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm4
+; AVX-32-NEXT: .LBB19_2:
+; AVX-32-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp)
-; AVX-32-NEXT: vcmpltss %xmm2, %xmm0, %xmm3
-; AVX-32-NEXT: vandnps %xmm2, %xmm3, %xmm3
-; AVX-32-NEXT: vsubss %xmm3, %xmm0, %xmm3
-; AVX-32-NEXT: vmovss %xmm3, (%esp)
; AVX-32-NEXT: flds {{[0-9]+}}(%esp)
; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp)
-; AVX-32-NEXT: flds (%esp)
-; AVX-32-NEXT: fisttpll (%esp)
-; AVX-32-NEXT: xorl %eax, %eax
-; AVX-32-NEXT: vucomiss %xmm2, %xmm1
; AVX-32-NEXT: setae %al
+; AVX-32-NEXT: movzbl %al, %eax
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX-32-NEXT: xorl %ecx, %ecx
-; AVX-32-NEXT: vucomiss %xmm2, %xmm0
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
+; AVX-32-NEXT: jb .LBB19_4
+; AVX-32-NEXT: # %bb.3:
+; AVX-32-NEXT: vmovaps %xmm1, %xmm2
+; AVX-32-NEXT: .LBB19_4:
+; AVX-32-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-32-NEXT: vmovss %xmm0, (%esp)
+; AVX-32-NEXT: flds (%esp)
+; AVX-32-NEXT: fisttpll (%esp)
; AVX-32-NEXT: setae %cl
+; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
@@ -1992,28 +2051,34 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 {
; AVX-64-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
; AVX-64: # %bb.0:
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm2
-; AVX-64-NEXT: vandnps %xmm1, %xmm2, %xmm2
-; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm2
-; AVX-64-NEXT: vcvttss2si %xmm2, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX-64-NEXT: jb .LBB19_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm3
+; AVX-64-NEXT: .LBB19_2:
+; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
+; AVX-64-NEXT: vcvttss2si %xmm3, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
+; AVX-64-NEXT: vmovq %rcx, %xmm3
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm1
-; AVX-64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB19_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB19_4:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
; AVX-64-NEXT: retq
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i1:
@@ -2085,58 +2150,66 @@ define <4 x i32> @strict_vector_fptoui_v4f32_to_v4i32(<4 x float> %a) #0 {
; SSE-32: # %bb.0:
; SSE-32-NEXT: movaps %xmm0, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3]
-; SSE-32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm1
+; SSE-32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; SSE-32-NEXT: comiss %xmm3, %xmm1
+; SSE-32-NEXT: xorps %xmm2, %xmm2
+; SSE-32-NEXT: xorps %xmm4, %xmm4
+; SSE-32-NEXT: jb .LBB21_2
+; SSE-32-NEXT: # %bb.1:
+; SSE-32-NEXT: movaps %xmm3, %xmm4
+; SSE-32-NEXT: .LBB21_2:
; SSE-32-NEXT: setae %al
-; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm1, %xmm3
-; SSE-32-NEXT: cmpltss %xmm2, %xmm3
-; SSE-32-NEXT: andnps %xmm2, %xmm3
-; SSE-32-NEXT: subss %xmm3, %xmm1
-; SSE-32-NEXT: cvttss2si %xmm1, %ecx
-; SSE-32-NEXT: xorl %eax, %ecx
-; SSE-32-NEXT: movd %ecx, %xmm1
-; SSE-32-NEXT: movaps %xmm0, %xmm3
-; SSE-32-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm3
+; SSE-32-NEXT: movzbl %al, %ecx
+; SSE-32-NEXT: shll $31, %ecx
+; SSE-32-NEXT: subss %xmm4, %xmm1
+; SSE-32-NEXT: cvttss2si %xmm1, %eax
+; SSE-32-NEXT: xorl %ecx, %eax
+; SSE-32-NEXT: movaps %xmm0, %xmm4
+; SSE-32-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
+; SSE-32-NEXT: comiss %xmm3, %xmm4
+; SSE-32-NEXT: xorps %xmm5, %xmm5
+; SSE-32-NEXT: jb .LBB21_4
+; SSE-32-NEXT: # %bb.3:
+; SSE-32-NEXT: movaps %xmm3, %xmm5
+; SSE-32-NEXT: .LBB21_4:
+; SSE-32-NEXT: movd %eax, %xmm1
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm3, %xmm4
-; SSE-32-NEXT: cmpltss %xmm2, %xmm4
-; SSE-32-NEXT: andnps %xmm2, %xmm4
-; SSE-32-NEXT: subss %xmm4, %xmm3
-; SSE-32-NEXT: cvttss2si %xmm3, %ecx
+; SSE-32-NEXT: subss %xmm5, %xmm4
+; SSE-32-NEXT: cvttss2si %xmm4, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
-; SSE-32-NEXT: movd %ecx, %xmm3
-; SSE-32-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: movd %ecx, %xmm4
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: xorps %xmm5, %xmm5
+; SSE-32-NEXT: jb .LBB21_6
+; SSE-32-NEXT: # %bb.5:
+; SSE-32-NEXT: movaps %xmm3, %xmm5
+; SSE-32-NEXT: .LBB21_6:
+; SSE-32-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
; SSE-32-NEXT: movaps %xmm0, %xmm1
-; SSE-32-NEXT: cmpltss %xmm2, %xmm1
-; SSE-32-NEXT: andnps %xmm2, %xmm1
-; SSE-32-NEXT: movaps %xmm0, %xmm4
-; SSE-32-NEXT: subss %xmm1, %xmm4
-; SSE-32-NEXT: cvttss2si %xmm4, %ecx
+; SSE-32-NEXT: subss %xmm5, %xmm1
+; SSE-32-NEXT: cvttss2si %xmm1, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm1
; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE-32-NEXT: xorl %eax, %eax
-; SSE-32-NEXT: ucomiss %xmm2, %xmm0
+; SSE-32-NEXT: comiss %xmm3, %xmm0
+; SSE-32-NEXT: jb .LBB21_8
+; SSE-32-NEXT: # %bb.7:
+; SSE-32-NEXT: movaps %xmm3, %xmm2
+; SSE-32-NEXT: .LBB21_8:
; SSE-32-NEXT: setae %al
+; SSE-32-NEXT: movzbl %al, %eax
; SSE-32-NEXT: shll $31, %eax
-; SSE-32-NEXT: movaps %xmm0, %xmm4
-; SSE-32-NEXT: cmpltss %xmm2, %xmm4
-; SSE-32-NEXT: andnps %xmm2, %xmm4
-; SSE-32-NEXT: subss %xmm4, %xmm0
+; SSE-32-NEXT: subss %xmm2, %xmm0
; SSE-32-NEXT: cvttss2si %xmm0, %ecx
; SSE-32-NEXT: xorl %eax, %ecx
; SSE-32-NEXT: movd %ecx, %xmm0
; SSE-32-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
+; SSE-32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
; SSE-32-NEXT: movdqa %xmm1, %xmm0
; SSE-32-NEXT: retl
;
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
index bcb002823d9..053d708c4af 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll
@@ -163,7 +163,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB1_2
@@ -180,7 +180,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
-; AVX-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX-32-NEXT: vcomisd %xmm1, %xmm4
; AVX-32-NEXT: vxorpd %xmm5, %xmm5, %xmm5
; AVX-32-NEXT: jb .LBB1_4
; AVX-32-NEXT: # %bb.3:
@@ -194,7 +194,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: movzbl %cl, %ecx
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
-; AVX-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX-32-NEXT: vcomisd %xmm1, %xmm3
; AVX-32-NEXT: vxorpd %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB1_6
; AVX-32-NEXT: # %bb.5:
@@ -208,7 +208,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX-32-NEXT: movzbl %dl, %edx
; AVX-32-NEXT: shll $31, %edx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX-32-NEXT: vcomisd %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB1_8
; AVX-32-NEXT: # %bb.7:
; AVX-32-NEXT: vmovapd %xmm1, %xmm2
@@ -237,53 +237,65 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
;
; AVX-64-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX-64-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX-64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm2
+; AVX-64-NEXT: vcomisd %xmm1, %xmm3
+; AVX-64-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; AVX-64-NEXT: jb .LBB1_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm4
+; AVX-64-NEXT: .LBB1_2:
+; AVX-64-NEXT: vsubsd %xmm4, %xmm3, %xmm4
+; AVX-64-NEXT: vcvttsd2si %xmm4, %rcx
; AVX-64-NEXT: setae %al
+; AVX-64-NEXT: movzbl %al, %eax
; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm2, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubsd %xmm3, %xmm2, %xmm3
-; AVX-64-NEXT: vcvttsd2si %xmm3, %rcx
+; AVX-64-NEXT: xorq %rcx, %rax
+; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
+; AVX-64-NEXT: vcomisd %xmm1, %xmm4
+; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB1_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm5
+; AVX-64-NEXT: .LBB1_4:
+; AVX-64-NEXT: vmovq %rax, %xmm3
+; AVX-64-NEXT: vsubsd %xmm5, %xmm4, %xmm4
+; AVX-64-NEXT: vcvttsd2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
-; AVX-64-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm2
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm2, %xmm4
-; AVX-64-NEXT: vandnpd %xmm1, %xmm4, %xmm4
-; AVX-64-NEXT: vsubsd %xmm4, %xmm2, %xmm2
-; AVX-64-NEXT: vcvttsd2si %xmm2, %rcx
-; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnpd %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubsd %xmm3, %xmm0, %xmm3
-; AVX-64-NEXT: vcvttsd2si %xmm3, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm4
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: vxorpd %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB1_6
+; AVX-64-NEXT: # %bb.5:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm5
+; AVX-64-NEXT: .LBB1_6:
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0]
+; AVX-64-NEXT: vsubsd %xmm5, %xmm0, %xmm4
+; AVX-64-NEXT: vcvttsd2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
+; AVX-64-NEXT: vmovq %rcx, %xmm4
; AVX-64-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomisd %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltsd %xmm1, %xmm0, %xmm4
-; AVX-64-NEXT: vandnpd %xmm1, %xmm4, %xmm1
-; AVX-64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX-64-NEXT: vcomisd %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB1_8
+; AVX-64-NEXT: # %bb.7:
+; AVX-64-NEXT: vmovapd %xmm1, %xmm2
+; AVX-64-NEXT: .LBB1_8:
+; AVX-64-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttsd2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
-; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f64_to_v4i64:
@@ -301,7 +313,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorpd %xmm3, %xmm3, %xmm3
@@ -319,7 +331,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm2[1,0]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %dl
; AVX512VL-32-NEXT: kmovw %edx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -332,7 +344,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -345,7 +357,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: xorl %ebx, %ebx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovsd %xmm3, %xmm1, %xmm1 {%k1}
@@ -532,7 +544,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: subl $32, %esp
; AVX-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB3_2
@@ -548,7 +560,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: shll $31, %eax
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX-32-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
-; AVX-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB3_4
; AVX-32-NEXT: # %bb.3:
@@ -563,7 +575,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: shll $31, %ecx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX-32-NEXT: vcomiss %xmm1, %xmm3
; AVX-32-NEXT: vxorps %xmm4, %xmm4, %xmm4
; AVX-32-NEXT: jb .LBB3_6
; AVX-32-NEXT: # %bb.5:
@@ -577,7 +589,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX-32-NEXT: movzbl %dl, %edx
; AVX-32-NEXT: shll $31, %edx
; AVX-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
-; AVX-32-NEXT: vucomiss %xmm1, %xmm0
+; AVX-32-NEXT: vcomiss %xmm1, %xmm0
; AVX-32-NEXT: jb .LBB3_8
; AVX-32-NEXT: # %bb.7:
; AVX-32-NEXT: vmovaps %xmm1, %xmm2
@@ -606,53 +618,65 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
;
; AVX-64-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
; AVX-64: # %bb.0:
-; AVX-64-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; AVX-64-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
; AVX-64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm2
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm2, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubss %xmm3, %xmm2, %xmm2
-; AVX-64-NEXT: vcvttss2si %xmm2, %rcx
-; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm2
-; AVX-64-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm3
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm3, %xmm4
-; AVX-64-NEXT: vandnps %xmm1, %xmm4, %xmm4
+; AVX-64-NEXT: vcomiss %xmm1, %xmm3
+; AVX-64-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-64-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX-64-NEXT: jb .LBB3_2
+; AVX-64-NEXT: # %bb.1:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm4
+; AVX-64-NEXT: .LBB3_2:
; AVX-64-NEXT: vsubss %xmm4, %xmm3, %xmm3
; AVX-64-NEXT: vcvttss2si %xmm3, %rcx
-; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
; AVX-64-NEXT: setae %al
+; AVX-64-NEXT: movzbl %al, %eax
; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm3
-; AVX-64-NEXT: vandnps %xmm1, %xmm3, %xmm3
-; AVX-64-NEXT: vsubss %xmm3, %xmm0, %xmm3
-; AVX-64-NEXT: vcvttss2si %xmm3, %rcx
+; AVX-64-NEXT: xorq %rcx, %rax
+; AVX-64-NEXT: vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
+; AVX-64-NEXT: vcomiss %xmm1, %xmm4
+; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB3_4
+; AVX-64-NEXT: # %bb.3:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm5
+; AVX-64-NEXT: .LBB3_4:
+; AVX-64-NEXT: vmovq %rax, %xmm3
+; AVX-64-NEXT: vsubss %xmm5, %xmm4, %xmm4
+; AVX-64-NEXT: vcvttss2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
-; AVX-64-NEXT: vmovq %rcx, %xmm3
+; AVX-64-NEXT: vmovq %rcx, %xmm4
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: vxorps %xmm5, %xmm5, %xmm5
+; AVX-64-NEXT: jb .LBB3_6
+; AVX-64-NEXT: # %bb.5:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm5
+; AVX-64-NEXT: .LBB3_6:
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm3 = xmm4[0],xmm3[0]
+; AVX-64-NEXT: vsubss %xmm5, %xmm0, %xmm4
+; AVX-64-NEXT: vcvttss2si %xmm4, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
+; AVX-64-NEXT: xorq %rax, %rcx
+; AVX-64-NEXT: vmovq %rcx, %xmm4
; AVX-64-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; AVX-64-NEXT: xorl %eax, %eax
-; AVX-64-NEXT: vucomiss %xmm1, %xmm0
-; AVX-64-NEXT: setae %al
-; AVX-64-NEXT: shlq $63, %rax
-; AVX-64-NEXT: vcmpltss %xmm1, %xmm0, %xmm4
-; AVX-64-NEXT: vandnps %xmm1, %xmm4, %xmm1
-; AVX-64-NEXT: vsubss %xmm1, %xmm0, %xmm0
-; AVX-64-NEXT: vcvttss2si %xmm0, %rcx
+; AVX-64-NEXT: vcomiss %xmm1, %xmm0
+; AVX-64-NEXT: jb .LBB3_8
+; AVX-64-NEXT: # %bb.7:
+; AVX-64-NEXT: vmovaps %xmm1, %xmm2
+; AVX-64-NEXT: .LBB3_8:
+; AVX-64-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX-64-NEXT: vcvttss2si %xmm0, %rax
+; AVX-64-NEXT: setae %cl
+; AVX-64-NEXT: movzbl %cl, %ecx
+; AVX-64-NEXT: shlq $63, %rcx
; AVX-64-NEXT: xorq %rax, %rcx
; AVX-64-NEXT: vmovq %rcx, %xmm0
-; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm3[0],xmm0[0]
-; AVX-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX-64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
+; AVX-64-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX-64-NEXT: retq
;
; AVX512VL-32-LABEL: strict_vector_fptoui_v4f32_to_v4i64:
@@ -670,7 +694,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %cl
; AVX512VL-32-NEXT: kmovw %ecx, %k1
; AVX512VL-32-NEXT: vxorps %xmm3, %xmm3, %xmm3
@@ -687,7 +711,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm2 = xmm0[3,1,2,3]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %dl
; AVX512VL-32-NEXT: kmovw %edx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -701,7 +725,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm2 = xmm0[1,0]
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm2
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm2
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -714,7 +738,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %edx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: xorl %ebx, %ebx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovss %xmm3, %xmm1, %xmm1 {%k1}
diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
index 26806db74a1..c5bf545ce77 100644
--- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
+++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll
@@ -150,7 +150,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
; AVX512VL-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vxorpd %xmm2, %xmm2, %xmm2
@@ -167,7 +167,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -181,7 +181,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -197,7 +197,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -212,7 +212,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -227,7 +227,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: vextractf32x4 $3, %zmm0, %xmm3
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm5
@@ -240,7 +240,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: shll $31, %ecx
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovapd %xmm1, %xmm4
@@ -252,7 +252,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: vucomisd %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomisd %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovsd %xmm2, %xmm1, %xmm1 {%k1}
@@ -454,7 +454,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: .cfi_offset %ebx, -12
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; AVX512VL-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vxorps %xmm2, %xmm2, %xmm2
@@ -470,7 +470,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm3 = xmm0[3,1,2,3]
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -485,7 +485,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -501,7 +501,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: movl %eax, %edi
; AVX512VL-32-NEXT: vextractf128 $1, %ymm0, %xmm3
; AVX512VL-32-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm5
@@ -516,7 +516,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
; AVX512VL-32-NEXT: movl %eax, %esi
; AVX512VL-32-NEXT: xorl %edx, %edx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -530,7 +530,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx
; AVX512VL-32-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3]
; AVX512VL-32-NEXT: xorl %ecx, %ecx
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm4
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm4
; AVX512VL-32-NEXT: setb %al
; AVX512VL-32-NEXT: kmovw %eax, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm5
@@ -544,7 +544,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx
; AVX512VL-32-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
; AVX512VL-32-NEXT: xorl %eax, %eax
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm3
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm3
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovaps %xmm1, %xmm4
@@ -556,7 +556,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 {
; AVX512VL-32-NEXT: setae %al
; AVX512VL-32-NEXT: shll $31, %eax
; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax
-; AVX512VL-32-NEXT: vucomiss %xmm1, %xmm0
+; AVX512VL-32-NEXT: vcomiss %xmm1, %xmm0
; AVX512VL-32-NEXT: setb %bl
; AVX512VL-32-NEXT: kmovw %ebx, %k1
; AVX512VL-32-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
index 40ff465a9dd..cca16cc0d70 100644
--- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll
@@ -4465,18 +4465,38 @@ entry:
define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rax
+; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm0, %xmm2
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: ja .LBB115_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm1
+; CHECK-NEXT: .LBB115_2: # %entry
+; CHECK-NEXT: subss %xmm1, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vcvttss2si %xmm0, %rax
+; AVX1-NEXT: vcomiss %xmm0, %xmm1
+; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: ja .LBB115_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm1, %xmm2
+; AVX1-NEXT: .LBB115_2: # %entry
+; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vcvttss2si %xmm0, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32:
@@ -4493,30 +4513,70 @@ entry:
define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rax
-; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm2
+; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB116_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm3
+; CHECK-NEXT: .LBB116_2: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm2
; CHECK-NEXT: cvttss2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm2
+; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm3, %xmm1
+; CHECK-NEXT: ja .LBB116_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: .LBB116_4: # %entry
+; CHECK-NEXT: subss %xmm0, %xmm3
+; CHECK-NEXT: cvttss2si %xmm3, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttss2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB116_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm3
+; AVX1-NEXT: .LBB116_2: # %entry
+; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttss2si %xmm2, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB116_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: .LBB116_4: # %entry
+; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v2i64_v2f32:
@@ -4537,35 +4597,95 @@ entry:
define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rax
-; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rdx
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm0, %xmm1
-; CHECK-NEXT: cvttss2si %xmm1, %rcx
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm0, %xmm0
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB117_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm3
+; CHECK-NEXT: .LBB117_2: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm2
+; CHECK-NEXT: cvttss2si %xmm2, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB117_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm3
+; CHECK-NEXT: .LBB117_4: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm2
+; CHECK-NEXT: cvttss2si %xmm2, %rcx
+; CHECK-NEXT: setbe %dl
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: shlq $63, %rdx
+; CHECK-NEXT: xorq %rcx, %rdx
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm2, %xmm1
+; CHECK-NEXT: ja .LBB117_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movaps %xmm1, %xmm0
+; CHECK-NEXT: .LBB117_6: # %entry
+; CHECK-NEXT: subss %xmm0, %xmm2
+; CHECK-NEXT: cvttss2si %xmm2, %rsi
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rsi, %rcx
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttss2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB117_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm3
+; AVX1-NEXT: .LBB117_2: # %entry
+; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB117_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm4
+; AVX1-NEXT: .LBB117_4: # %entry
+; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttss2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB117_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: .LBB117_6: # %entry
+; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32:
@@ -4590,49 +4710,129 @@ entry:
define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorps %xmm1, %xmm1
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm0
-; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm2
+; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm0, %xmm2
+; CHECK-NEXT: xorps %xmm1, %xmm1
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB118_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm3
+; CHECK-NEXT: .LBB118_2: # %entry
+; CHECK-NEXT: subss %xmm3, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm0
+; CHECK-NEXT: comiss %xmm0, %xmm2
+; CHECK-NEXT: xorps %xmm4, %xmm4
+; CHECK-NEXT: ja .LBB118_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm4
+; CHECK-NEXT: .LBB118_4: # %entry
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: subss %xmm4, %xmm0
; CHECK-NEXT: cvttss2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm2
-; CHECK-NEXT: cvttss2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm2
-; CHECK-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; CHECK-NEXT: subss %xmm1, %xmm3
-; CHECK-NEXT: cvttss2si %xmm3, %rax
-; CHECK-NEXT: movq %rax, %xmm1
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm4, %xmm2
+; CHECK-NEXT: xorps %xmm5, %xmm5
+; CHECK-NEXT: ja .LBB118_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm5
+; CHECK-NEXT: .LBB118_6: # %entry
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; CHECK-NEXT: subss %xmm5, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm3
+; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; CHECK-NEXT: comiss %xmm4, %xmm2
+; CHECK-NEXT: ja .LBB118_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: movaps %xmm2, %xmm1
+; CHECK-NEXT: .LBB118_8: # %entry
+; CHECK-NEXT: subss %xmm1, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttss2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
-; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
; AVX1-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttss2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm2, %xmm0
+; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB118_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm3
+; AVX1-NEXT: .LBB118_2: # %entry
+; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttss2si %xmm2, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; AVX1-NEXT: vsubss %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vcomiss %xmm3, %xmm0
+; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB118_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm4
+; AVX1-NEXT: .LBB118_4: # %entry
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttss2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm4, %xmm0
+; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: ja .LBB118_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm5
+; AVX1-NEXT: .LBB118_6: # %entry
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3
+; AVX1-NEXT: vcvttss2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero
+; AVX1-NEXT: vcomiss %xmm4, %xmm0
+; AVX1-NEXT: ja .LBB118_8
+; AVX1-NEXT: # %bb.7: # %entry
+; AVX1-NEXT: vmovaps %xmm0, %xmm1
+; AVX1-NEXT: .LBB118_8: # %entry
+; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vcvttss2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32:
@@ -4810,18 +5010,38 @@ entry:
define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm0, %xmm0
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rax
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: comisd %xmm0, %xmm2
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: ja .LBB123_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm1
+; CHECK-NEXT: .LBB123_2: # %entry
+; CHECK-NEXT: subsd %xmm1, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vcvttsd2si %xmm0, %rax
+; AVX1-NEXT: vcomisd %xmm0, %xmm1
+; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: ja .LBB123_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm1, %xmm2
+; AVX1-NEXT: .LBB123_2: # %entry
+; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vcvttsd2si %xmm0, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64:
@@ -4838,30 +5058,70 @@ entry:
define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm0, %xmm0
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rax
-; CHECK-NEXT: movq %rax, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm2
+; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: xorpd %xmm0, %xmm0
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB124_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm3
+; CHECK-NEXT: .LBB124_2: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm2
; CHECK-NEXT: cvttsd2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm2
+; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
+; CHECK-NEXT: comisd %xmm3, %xmm1
+; CHECK-NEXT: ja .LBB124_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: .LBB124_4: # %entry
+; CHECK-NEXT: subsd %xmm0, %xmm3
+; CHECK-NEXT: cvttsd2si %xmm3, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB124_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm3
+; AVX1-NEXT: .LBB124_2: # %entry
+; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttsd2si %xmm2, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB124_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm1
+; AVX1-NEXT: .LBB124_4: # %entry
+; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; AVX1-NEXT: retq
;
; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64:
@@ -4890,35 +5150,95 @@ entry:
define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm0, %xmm0
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rax
-; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rdx
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: subsd %xmm0, %xmm1
-; CHECK-NEXT: cvttsd2si %xmm1, %rcx
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: xorpd %xmm0, %xmm0
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB125_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm3
+; CHECK-NEXT: .LBB125_2: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm2
+; CHECK-NEXT: cvttsd2si %xmm2, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB125_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm3
+; CHECK-NEXT: .LBB125_4: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm2
+; CHECK-NEXT: cvttsd2si %xmm2, %rcx
+; CHECK-NEXT: setbe %dl
+; CHECK-NEXT: movzbl %dl, %edx
+; CHECK-NEXT: shlq $63, %rdx
+; CHECK-NEXT: xorq %rcx, %rdx
+; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
+; CHECK-NEXT: comisd %xmm2, %xmm1
+; CHECK-NEXT: ja .LBB125_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movapd %xmm1, %xmm0
+; CHECK-NEXT: .LBB125_6: # %entry
+; CHECK-NEXT: subsd %xmm0, %xmm2
+; CHECK-NEXT: cvttsd2si %xmm2, %rsi
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rsi, %rcx
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB125_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm3
+; AVX1-NEXT: .LBB125_2: # %entry
+; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB125_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm4
+; AVX1-NEXT: .LBB125_4: # %entry
+; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttsd2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: ja .LBB125_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm1
+; AVX1-NEXT: .LBB125_6: # %entry
+; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64:
@@ -4943,49 +5263,129 @@ entry:
define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 {
; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xorpd %xmm1, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm0
-; CHECK-NEXT: cvttsd2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm2
-; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm0
-; CHECK-NEXT: cvttsd2si %xmm0, %rax
-; CHECK-NEXT: movq %rax, %xmm0
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm2
-; CHECK-NEXT: cvttsd2si %xmm2, %rax
-; CHECK-NEXT: movq %rax, %xmm2
-; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
-; CHECK-NEXT: subsd %xmm1, %xmm3
-; CHECK-NEXT: cvttsd2si %xmm3, %rax
-; CHECK-NEXT: movq %rax, %xmm1
-; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; CHECK-NEXT: comisd %xmm0, %xmm2
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: xorpd %xmm3, %xmm3
+; CHECK-NEXT: ja .LBB126_2
+; CHECK-NEXT: # %bb.1: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm3
+; CHECK-NEXT: .LBB126_2: # %entry
+; CHECK-NEXT: subsd %xmm3, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rcx
+; CHECK-NEXT: setbe %al
+; CHECK-NEXT: movzbl %al, %eax
+; CHECK-NEXT: shlq $63, %rax
+; CHECK-NEXT: xorq %rcx, %rax
+; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: comisd %xmm0, %xmm2
+; CHECK-NEXT: xorpd %xmm4, %xmm4
+; CHECK-NEXT: ja .LBB126_4
+; CHECK-NEXT: # %bb.3: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm4
+; CHECK-NEXT: .LBB126_4: # %entry
+; CHECK-NEXT: movq %rax, %xmm3
+; CHECK-NEXT: subsd %xmm4, %xmm0
+; CHECK-NEXT: cvttsd2si %xmm0, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm0
+; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
+; CHECK-NEXT: comisd %xmm4, %xmm2
+; CHECK-NEXT: xorpd %xmm5, %xmm5
+; CHECK-NEXT: ja .LBB126_6
+; CHECK-NEXT: # %bb.5: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm5
+; CHECK-NEXT: .LBB126_6: # %entry
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; CHECK-NEXT: subsd %xmm5, %xmm4
+; CHECK-NEXT: cvttsd2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm3
+; CHECK-NEXT: movsd {{.*#+}} xmm4 = mem[0],zero
+; CHECK-NEXT: comisd %xmm4, %xmm2
+; CHECK-NEXT: ja .LBB126_8
+; CHECK-NEXT: # %bb.7: # %entry
+; CHECK-NEXT: movapd %xmm2, %xmm1
+; CHECK-NEXT: .LBB126_8: # %entry
+; CHECK-NEXT: subsd %xmm1, %xmm4
+; CHECK-NEXT: cvttsd2si %xmm4, %rax
+; CHECK-NEXT: setbe %cl
+; CHECK-NEXT: movzbl %cl, %ecx
+; CHECK-NEXT: shlq $63, %rcx
+; CHECK-NEXT: xorq %rax, %rcx
+; CHECK-NEXT: movq %rcx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm1, %xmm1
-; AVX1-NEXT: vcvttsd2si %xmm1, %rax
-; AVX1-NEXT: vmovq %rax, %xmm1
; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
-; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm2, %xmm2
-; AVX1-NEXT: vcvttsd2si %xmm2, %rax
-; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm2, %xmm0
+; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3
+; AVX1-NEXT: ja .LBB126_2
+; AVX1-NEXT: # %bb.1: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm3
+; AVX1-NEXT: .LBB126_2: # %entry
+; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vcvttsd2si %xmm2, %rcx
+; AVX1-NEXT: setbe %al
+; AVX1-NEXT: movzbl %al, %eax
+; AVX1-NEXT: shlq $63, %rax
+; AVX1-NEXT: xorq %rcx, %rax
; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX1-NEXT: vsubsd %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vcomisd %xmm3, %xmm0
+; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4
+; AVX1-NEXT: ja .LBB126_4
+; AVX1-NEXT: # %bb.3: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm4
+; AVX1-NEXT: .LBB126_4: # %entry
+; AVX1-NEXT: vmovq %rax, %xmm2
+; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3
+; AVX1-NEXT: vcvttsd2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm4, %xmm0
+; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5
+; AVX1-NEXT: ja .LBB126_6
+; AVX1-NEXT: # %bb.5: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm5
+; AVX1-NEXT: .LBB126_6: # %entry
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0]
+; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3
+; AVX1-NEXT: vcvttsd2si %xmm3, %rax
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm3
+; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero
+; AVX1-NEXT: vcomisd %xmm4, %xmm0
+; AVX1-NEXT: ja .LBB126_8
+; AVX1-NEXT: # %bb.7: # %entry
+; AVX1-NEXT: vmovapd %xmm0, %xmm1
+; AVX1-NEXT: .LBB126_8: # %entry
+; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0
; AVX1-NEXT: vcvttsd2si %xmm0, %rax
-; AVX1-NEXT: vmovq %rax, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: setbe %cl
+; AVX1-NEXT: movzbl %cl, %ecx
+; AVX1-NEXT: shlq $63, %rcx
+; AVX1-NEXT: xorq %rax, %rcx
+; AVX1-NEXT: vmovq %rcx, %xmm0
+; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64:
@@ -6384,34 +6784,34 @@ entry:
define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: js .LBB170_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB170_1:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: orq %rax, %rdi
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
+; CHECK-NEXT: jns .LBB170_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB170_2: # %entry
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: testq %rdi, %rdi
-; AVX1-NEXT: js .LBB170_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB170_1:
; AVX1-NEXT: movq %rdi, %rax
; AVX1-NEXT: shrq %rax
-; AVX1-NEXT: andl $1, %edi
-; AVX1-NEXT: orq %rax, %rdi
-; AVX1-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0
+; AVX1-NEXT: movl %edi, %ecx
+; AVX1-NEXT: andl $1, %ecx
+; AVX1-NEXT: orq %rax, %rcx
+; AVX1-NEXT: testq %rdi, %rdi
+; AVX1-NEXT: cmovnsq %rdi, %rcx
+; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0
+; AVX1-NEXT: jns .LBB170_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB170_2: # %entry
; AVX1-NEXT: retq
;
; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64:
@@ -6548,74 +6948,65 @@ define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB174_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: movq %xmm1, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jns .LBB174_5
-; CHECK-NEXT: .LBB174_4:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB174_1:
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
+; CHECK-NEXT: jns .LBB174_2
+; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB174_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB174_4
-; CHECK-NEXT: .LBB174_5: # %entry
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: jns .LBB174_4
+; CHECK-NEXT: # %bb.3:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB174_4: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB174_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB174_5
-; AVX1-NEXT: .LBB174_4:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
-; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB174_1:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-NEXT: jns .LBB174_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: .LBB174_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB174_4
-; AVX1-NEXT: .LBB174_5: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm0
+; AVX1-NEXT: jns .LBB174_4
+; AVX1-NEXT: # %bb.3:
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB174_4: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
; AVX1-NEXT: retq
;
@@ -6805,100 +7196,90 @@ entry:
define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq %rsi, %rax
+; CHECK-NEXT: shrq %rax
+; CHECK-NEXT: movl %esi, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: testq %rsi, %rsi
-; CHECK-NEXT: js .LBB178_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: jns .LBB178_5
-; CHECK-NEXT: .LBB178_4:
+; CHECK-NEXT: cmovnsq %rsi, %rcx
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
+; CHECK-NEXT: jns .LBB178_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB178_2: # %entry
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: orq %rax, %rdi
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
+; CHECK-NEXT: movl %edi, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: cmovnsq %rdi, %rcx
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm0
+; CHECK-NEXT: jns .LBB178_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB178_4: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
-; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: jns .LBB178_8
-; CHECK-NEXT: .LBB178_7:
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %edx
-; CHECK-NEXT: orq %rax, %rdx
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB178_1:
-; CHECK-NEXT: movq %rsi, %rax
-; CHECK-NEXT: shrq %rax
-; CHECK-NEXT: andl $1, %esi
-; CHECK-NEXT: orq %rax, %rsi
-; CHECK-NEXT: cvtsi2ss %rsi, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: js .LBB178_4
-; CHECK-NEXT: .LBB178_5: # %entry
-; CHECK-NEXT: cvtsi2ss %rdi, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-NEXT: movl %edx, %ecx
+; CHECK-NEXT: andl $1, %ecx
+; CHECK-NEXT: orq %rax, %rcx
; CHECK-NEXT: testq %rdx, %rdx
-; CHECK-NEXT: js .LBB178_7
-; CHECK-NEXT: .LBB178_8: # %entry
+; CHECK-NEXT: cmovnsq %rdx, %rcx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: cvtsi2ss %rcx, %xmm1
+; CHECK-NEXT: jns .LBB178_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB178_6: # %entry
; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: retq
;
; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB178_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB178_5
-; AVX1-NEXT: .LBB178_4:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: jmp .LBB178_6
-; AVX1-NEXT: .LBB178_1:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-NEXT: jns .LBB178_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: .LBB178_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB178_4
-; AVX1-NEXT: .LBB178_5: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: .LBB178_6: # %entry
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
+; AVX1-NEXT: jns .LBB178_4
+; AVX1-NEXT: # %bb.3:
+; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: .LBB178_4: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB178_7
-; AVX1-NEXT: # %bb.8: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB178_7:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
+; AVX1-NEXT: jns .LBB178_6
+; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB178_6: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -6949,10 +7330,10 @@ define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2pd %xmm0, %ymm0
; AVX1-NEXT: vmulpd {{.*}}(%rip), %ymm0, %ymm0
+; AVX1-NEXT: vcvtdq2pd %xmm1, %ymm1
; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -6978,12 +7359,12 @@ entry:
define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movaps {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; CHECK-NEXT: andps %xmm0, %xmm1
+; CHECK-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NEXT: psrld $16, %xmm1
; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1
-; CHECK-NEXT: psrld $16, %xmm0
+; CHECK-NEXT: mulps {{.*}}(%rip), %xmm1
+; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0
-; CHECK-NEXT: mulps {{.*}}(%rip), %xmm0
; CHECK-NEXT: addps %xmm1, %xmm0
; CHECK-NEXT: retq
;
@@ -6991,10 +7372,10 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 {
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7]
-; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1
; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0
; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX1-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vcvtdq2ps %xmm1, %xmm1
; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
@@ -7078,73 +7459,62 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: shrq %rcx
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_1
-; CHECK-NEXT: # %bb.2: # %entry
-; CHECK-NEXT: cvtsi2ss %rax, %xmm2
+; CHECK-NEXT: cmovnsq %rax, %rdx
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm2
+; CHECK-NEXT: jns .LBB182_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: addss %xmm2, %xmm2
+; CHECK-NEXT: .LBB182_2: # %entry
; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; CHECK-NEXT: movq %xmm1, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jns .LBB182_5
-; CHECK-NEXT: .LBB182_4:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: cvtsi2ss %rax, %xmm3
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovnsq %rax, %rdx
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm3
+; CHECK-NEXT: jns .LBB182_4
+; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addss %xmm3, %xmm3
+; CHECK-NEXT: .LBB182_4: # %entry
; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: jns .LBB182_8
-; CHECK-NEXT: .LBB182_7:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK-NEXT: addss %xmm1, %xmm1
-; CHECK-NEXT: jmp .LBB182_9
-; CHECK-NEXT: .LBB182_1:
-; CHECK-NEXT: movq %rax, %rcx
-; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
-; CHECK-NEXT: cvtsi2ss %rax, %xmm2
-; CHECK-NEXT: addss %xmm2, %xmm2
-; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
-; CHECK-NEXT: movq %xmm1, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_4
-; CHECK-NEXT: .LBB182_5: # %entry
-; CHECK-NEXT: cvtsi2ss %rax, %xmm3
-; CHECK-NEXT: movq %xmm0, %rax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_7
-; CHECK-NEXT: .LBB182_8: # %entry
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm1, %xmm1
-; CHECK-NEXT: cvtsi2ss %rax, %xmm1
-; CHECK-NEXT: .LBB182_9: # %entry
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm1
+; CHECK-NEXT: jns .LBB182_6
+; CHECK-NEXT: # %bb.5:
+; CHECK-NEXT: addss %xmm1, %xmm1
+; CHECK-NEXT: .LBB182_6: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; CHECK-NEXT: movq %xmm0, %rax
-; CHECK-NEXT: testq %rax, %rax
-; CHECK-NEXT: js .LBB182_10
-; CHECK-NEXT: # %bb.11: # %entry
-; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
-; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
-; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
-; CHECK-NEXT: movaps %xmm1, %xmm0
-; CHECK-NEXT: retq
-; CHECK-NEXT: .LBB182_10:
; CHECK-NEXT: movq %rax, %rcx
; CHECK-NEXT: shrq %rcx
-; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: orq %rcx, %rax
+; CHECK-NEXT: movl %eax, %edx
+; CHECK-NEXT: andl $1, %edx
+; CHECK-NEXT: orq %rcx, %rdx
+; CHECK-NEXT: testq %rax, %rax
+; CHECK-NEXT: cmovnsq %rax, %rdx
; CHECK-NEXT: xorps %xmm0, %xmm0
-; CHECK-NEXT: cvtsi2ss %rax, %xmm0
+; CHECK-NEXT: cvtsi2ss %rdx, %xmm0
+; CHECK-NEXT: jns .LBB182_8
+; CHECK-NEXT: # %bb.7:
; CHECK-NEXT: addss %xmm0, %xmm0
+; CHECK-NEXT: .LBB182_8: # %entry
; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: movaps %xmm1, %xmm0
@@ -7153,68 +7523,60 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; AVX1: # %bb.0: # %entry
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_1
-; AVX1-NEXT: # %bb.2: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB182_5
-; AVX1-NEXT: .LBB182_4:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: jmp .LBB182_6
-; AVX1-NEXT: .LBB182_1:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1
+; AVX1-NEXT: jns .LBB182_2
+; AVX1-NEXT: # %bb.1:
; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1
+; AVX1-NEXT: .LBB182_2: # %entry
; AVX1-NEXT: vmovq %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_4
-; AVX1-NEXT: .LBB182_5: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX1-NEXT: .LBB182_6: # %entry
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2
+; AVX1-NEXT: jns .LBB182_4
+; AVX1-NEXT: # %bb.3:
+; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: .LBB182_4: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vmovq %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_7
-; AVX1-NEXT: # %bb.8: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
-; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
-; AVX1-NEXT: vpextrq $1, %xmm0, %rax
-; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: jns .LBB182_11
-; AVX1-NEXT: .LBB182_10:
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
-; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
-; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-; AVX1-NEXT: .LBB182_7:
-; AVX1-NEXT: movq %rax, %rcx
-; AVX1-NEXT: shrq %rcx
-; AVX1-NEXT: andl $1, %eax
-; AVX1-NEXT: orq %rcx, %rax
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
+; AVX1-NEXT: testq %rax, %rax
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm2
+; AVX1-NEXT: jns .LBB182_6
+; AVX1-NEXT: # %bb.5:
; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: .LBB182_6: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; AVX1-NEXT: vpextrq $1, %xmm0, %rax
+; AVX1-NEXT: movq %rax, %rcx
+; AVX1-NEXT: shrq %rcx
+; AVX1-NEXT: movl %eax, %edx
+; AVX1-NEXT: andl $1, %edx
+; AVX1-NEXT: orq %rcx, %rdx
; AVX1-NEXT: testq %rax, %rax
-; AVX1-NEXT: js .LBB182_10
-; AVX1-NEXT: .LBB182_11: # %entry
-; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
+; AVX1-NEXT: cmovnsq %rax, %rdx
+; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0
+; AVX1-NEXT: jns .LBB182_8
+; AVX1-NEXT: # %bb.7:
+; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0
+; AVX1-NEXT: .LBB182_8: # %entry
; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
@@ -7238,39 +7600,28 @@ define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 {
;
; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64:
; AVX512DQ: # %bb.0: # %entry
-; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
-; AVX512DQ-NEXT: vmovq %xmm0, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
-; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512DQ-NEXT: vmovq %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3]
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0]
+; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm1
; AVX512DQ-NEXT: vpbroadcastq {{.*#+}} ymm2 = [1,1,1,1]
; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm2
; AVX512DQ-NEXT: vpsrlq $1, %ymm0, %ymm3
; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
+; AVX512DQ-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0
+; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2
+; AVX512DQ-NEXT: vmovq %xmm0, %rax
; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
-; AVX512DQ-NEXT: vmovq %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3]
-; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm2
-; AVX512DQ-NEXT: vmovq %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rax
-; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2
-; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
-; AVX512DQ-NEXT: vaddps %xmm2, %xmm2, %xmm2
-; AVX512DQ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; AVX512DQ-NEXT: vpcmpgtq %ymm0, %ymm3, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3]
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512DQ-NEXT: vmovq %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
+; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax
+; AVX512DQ-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0
+; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
+; AVX512DQ-NEXT: vaddps %xmm0, %xmm0, %xmm2
+; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512DQ-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
entry:
OpenPOWER on IntegriCloud