diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 7 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 116 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 48 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/ftrunc.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/pr42905.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sqrt-partial.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/undef-label.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_fp_to_int.ll | 16 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_int_to_fp.ll | 106 |
10 files changed, 170 insertions, 145 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 637102e47fd..249da7b888f 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -6998,6 +6998,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSched RegisterClass SrcRC, X86VectorVTInfo DstVT, X86MemOperand x86memop, PatFrag ld_frag, string asm, string mem> { +let ExeDomain = DstVT.ExeDomain in { let hasSideEffects = 0, isCodeGenOnly = 1 in { def rr : SI<opc, MRMSrcReg, (outs DstVT.FRC:$dst), (ins DstVT.FRC:$src1, SrcRC:$src), @@ -7023,6 +7024,7 @@ multiclass avx512_vcvtsi<bits<8> opc, SDPatternOperator OpNode, X86FoldableSched (OpNode (DstVT.VT DstVT.RC:$src1), (ld_frag addr:$src2)))]>, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; +} def : InstAlias<"v"#asm#mem#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", (!cast<Instruction>(NAME#"rr_Int") DstVT.RC:$dst, DstVT.RC:$src1, SrcRC:$src2), 0, "att">; @@ -7032,6 +7034,7 @@ multiclass avx512_vcvtsi_round<bits<8> opc, SDNode OpNode, X86FoldableSchedWrite sched, RegisterClass SrcRC, X86VectorVTInfo DstVT, string asm, string mem> { + let ExeDomain = DstVT.ExeDomain in def rrb_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins DstVT.RC:$src1, SrcRC:$src2, AVX512RC:$rc), !strconcat(asm, @@ -7145,7 +7148,7 @@ multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT, SDNode OpNodeRnd, X86FoldableSchedWrite sched, string asm, string aliasStr> { - let Predicates = [HasAVX512] in { + let Predicates = [HasAVX512], ExeDomain = SrcVT.ExeDomain in { def rr_Int : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src)))]>, @@ -7286,7 +7289,7 @@ multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC, X86VectorVTInfo _DstRC, SDNode OpNode, SDNode OpNodeInt, SDNode OpNodeSAE, X86FoldableSchedWrite sched, string aliasStr>{ -let Predicates = [HasAVX512] in { +let Predicates = [HasAVX512], ExeDomain = _SrcRC.ExeDomain in { let isCodeGenOnly = 1 in { def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 7633c3f7709..ed376d4ce96 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -823,7 +823,9 @@ let Constraints = "$src1 = $dst" in { multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, string asm, string mem, X86FoldableSchedWrite sched, + Domain d, SchedRead Int2Fpu = ReadDefault> { + let ExeDomain = d in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), [(set DstRC:$dst, (OpNode SrcRC:$src))]>, @@ -832,6 +834,7 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, mem#"\t{$src, $dst|$dst, $src}", [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>, Sched<[sched.Folded]>; + } } multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop, @@ -851,8 +854,8 @@ let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm, string mem, - X86FoldableSchedWrite sched> { -let hasSideEffects = 0, Predicates = [UseAVX] in { + X86FoldableSchedWrite sched, Domain d> { +let hasSideEffects = 0, Predicates = [UseAVX], ExeDomain = d in { def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, Sched<[sched, ReadDefault, ReadInt2Fpu]>; @@ -867,19 +870,19 @@ let hasSideEffects = 0, Predicates = [UseAVX] in { let isCodeGenOnly = 1, Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; } @@ -889,13 +892,17 @@ defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, // where appropriate to do so. let isCodeGenOnly = 1 in { defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l", - WriteCvtI2SS>, XS, VEX_4V, VEX_LIG, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q", - WriteCvtI2SS>, XS, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V, + VEX_W, VEX_LIG, SIMD_EXC; defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l", - WriteCvtI2SD>, XD, VEX_4V, VEX_LIG; + WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + VEX_LIG; defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q", - WriteCvtI2SD>, XD, VEX_4V, VEX_W, VEX_LIG, SIMD_EXC; + WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V, + VEX_W, VEX_LIG, SIMD_EXC; } // isCodeGenOnly = 1 let Predicates = [UseAVX] in { @@ -921,28 +928,28 @@ let Predicates = [UseAVX] in { let isCodeGenOnly = 1 in { defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, XS, SIMD_EXC; defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, "cvttss2si", "cvttss2si", - WriteCvtSS2I>, XS, REX_W, SIMD_EXC; + WriteCvtSS2I, SSEPackedSingle>, XS, REX_W, SIMD_EXC; defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, XD, SIMD_EXC; defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, "cvttsd2si", "cvttsd2si", - WriteCvtSD2I>, XD, REX_W, SIMD_EXC; + WriteCvtSD2I, SSEPackedDouble>, XD, REX_W, SIMD_EXC; defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, "cvtsi2ss", "cvtsi2ss{l}", - WriteCvtI2SS, ReadInt2Fpu>, XS, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, "cvtsi2ss", "cvtsi2ss{q}", - WriteCvtI2SS, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; + WriteCvtI2SS, SSEPackedSingle, ReadInt2Fpu>, XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, "cvtsi2sd", "cvtsi2sd{l}", - WriteCvtI2SD, ReadInt2Fpu>, XD; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD; defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, "cvtsi2sd", "cvtsi2sd{q}", - WriteCvtI2SD, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; + WriteCvtI2SD, SSEPackedDouble, ReadInt2Fpu>, XD, REX_W, SIMD_EXC; } // isCodeGenOnly = 1 // Conversion Instructions Intrinsics - Match intrinsics which expect MM @@ -951,7 +958,8 @@ defm CVTSI642SD : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, ValueType DstVT, ValueType SrcVT, SDNode OpNode, Operand memop, ComplexPattern mem_cpat, string asm, - X86FoldableSchedWrite sched> { + X86FoldableSchedWrite sched, Domain d> { +let ExeDomain = d in { def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [(set DstRC:$dst, (DstVT (OpNode (SrcVT SrcRC:$src))))]>, @@ -961,12 +969,13 @@ multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, [(set DstRC:$dst, (DstVT (OpNode (SrcVT mem_cpat:$src))))]>, Sched<[sched.Folded]>; } +} multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, X86MemOperand x86memop, string asm, string mem, X86FoldableSchedWrite sched, - bit Is2Addr = 1> { -let hasSideEffects = 0 in { + Domain d, bit Is2Addr = 1> { +let hasSideEffects = 0, ExeDomain = d in { def rr_Int : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), !if(Is2Addr, !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), @@ -986,36 +995,46 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in { let Predicates = [UseAVX] in { defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", - WriteCvtSD2I>, XD, VEX, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, sdmem, sse_load_f64, "cvtsd2si", - WriteCvtSD2I>, XD, VEX, VEX_W, VEX_LIG; + WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG; } defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si, - sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD; + sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, + SSEPackedDouble>, XD; defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64, X86cvts2si, - sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I>, XD, REX_W; + sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I, + SSEPackedDouble>, XD, REX_W; } let Predicates = [UseAVX] in { defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, SIMD_EXC; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle, 0>, + XS, VEX_4V, VEX_LIG, SIMD_EXC; defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS, 0>, XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>, + XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd", "l", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG; + i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>, + XD, VEX_4V, VEX_LIG; defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD, 0>, XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>, + XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC; } let Constraints = "$src1 = $dst" in { defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2ss", "l", WriteCvtI2SS>, XS, SIMD_EXC; + i32mem, "cvtsi2ss", "l", WriteCvtI2SS, SSEPackedSingle>, + XS, SIMD_EXC; defm CVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2ss", "q", WriteCvtI2SS>, XS, REX_W, SIMD_EXC; + i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle>, + XS, REX_W, SIMD_EXC; defm CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - i32mem, "cvtsi2sd", "l", WriteCvtI2SD>, XD; + i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble>, + XD; defm CVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - i64mem, "cvtsi2sd", "q", WriteCvtI2SD>, XD, REX_W, SIMD_EXC; + i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble>, + XD, REX_W, SIMD_EXC; } def : InstAlias<"vcvtsi2ss{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -1052,32 +1071,34 @@ def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, - "cvttss2si", WriteCvtSS2I>, + "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG, VEX_W; defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSS2I>, XD, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG; defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, - "cvttsd2si", WriteCvtSS2I>, + "cvttsd2si", WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG, VEX_W; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int, ssmem, sse_load_f32, "cvttss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I, SSEPackedSingle>, XS; defm CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32, X86cvtts2Int, ssmem, sse_load_f32, - "cvttss2si", WriteCvtSS2I>, XS, REX_W; + "cvttss2si", WriteCvtSS2I, SSEPackedSingle>, + XS, REX_W; defm CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int, sdmem, sse_load_f64, "cvttsd2si", - WriteCvtSD2I>, XD; + WriteCvtSD2I, SSEPackedDouble>, XD; defm CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64, X86cvtts2Int, sdmem, sse_load_f64, - "cvttsd2si", WriteCvtSD2I>, XD, REX_W; + "cvttsd2si", WriteCvtSD2I, SSEPackedDouble>, + XD, REX_W; } def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", @@ -1117,18 +1138,18 @@ def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", let Predicates = [UseAVX], Uses = [MXCSR], mayRaiseFPException = 1 in { defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, VEX, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG; defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, VEX, VEX_W, VEX_LIG; + WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG; } let Uses = [MXCSR], mayRaiseFPException = 1 in { defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS; + WriteCvtSS2I, SSEPackedSingle>, XS; defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si, ssmem, sse_load_f32, "cvtss2si", - WriteCvtSS2I>, XS, REX_W; + WriteCvtSS2I, SSEPackedSingle>, XS, REX_W; defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load, "vcvtdq2ps\t{$src, $dst|$dst, $src}", @@ -1817,7 +1838,8 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, PatFrag ld_frag, string OpcodeStr, Domain d, X86FoldableSchedWrite sched = WriteFCom> { -let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1 in { +let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1, + ExeDomain = d in { def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, @@ -1837,7 +1859,7 @@ multiclass sse12_ord_cmp_int<bits<8> opc, RegisterClass RC, SDNode OpNode, ComplexPattern mem_cpat, string OpcodeStr, Domain d, X86FoldableSchedWrite sched = WriteFCom> { -let Uses = [MXCSR], mayRaiseFPException = 1 in { +let Uses = [MXCSR], mayRaiseFPException = 1, ExeDomain = d in { def rr_Int: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))]>, diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 6f724738864..e6b43c07fe0 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -25,25 +25,25 @@ define <8 x double> @sltof864(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -69,12 +69,12 @@ define <4 x double> @slto4f64(<4 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; NODQ-NEXT: retq ; @@ -100,7 +100,7 @@ define <2 x double> @slto2f64(<2 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; NODQ-NEXT: retq ; ; VLDQ-LABEL: slto2f64: @@ -140,7 +140,7 @@ define <2 x float> @sltof2f32(<2 x i64> %a) { ; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; VLNODQ-NEXT: vmovq %xmm0, %rax ; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VLNODQ-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; VLNODQ-NEXT: retq ; @@ -1040,13 +1040,13 @@ define <16 x float> @slto16f32(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm2 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 +; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] @@ -1094,25 +1094,25 @@ define <8 x double> @slto8f64(<8 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; NODQ-NEXT: retq @@ -1138,25 +1138,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vpextrq $1, %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm0, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; NODQ-NEXT: vextracti32x4 $3, %zmm1, %xmm2 @@ -1164,25 +1164,25 @@ define <16 x double> @slto16f64(<16 x i64> %a) { ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm2 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; NODQ-NEXT: vextracti32x4 $2, %zmm1, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 ; NODQ-NEXT: vextracti128 $1, %ymm1, %xmm3 ; NODQ-NEXT: vpextrq $1, %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] ; NODQ-NEXT: vpextrq $1, %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm4 ; NODQ-NEXT: vmovq %xmm1, %rax ; NODQ-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1 -; NODQ-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm4[0] +; NODQ-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm4[0] ; NODQ-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 ; NODQ-NEXT: vinsertf64x4 $1, %ymm2, %zmm1, %zmm1 ; NODQ-NEXT: retq @@ -1275,13 +1275,13 @@ define <16 x float> @ulto16f32(<16 x i64> %a) { ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm1 ; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] ; NODQ-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; NODQ-NEXT: vextracti32x4 $2, %zmm0, %xmm2 +; NODQ-NEXT: vextractf32x4 $2, %zmm0, %xmm2 ; NODQ-NEXT: vpextrq $1, %xmm2, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm3 ; NODQ-NEXT: vmovq %xmm2, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm2 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm3 +; NODQ-NEXT: vextractf32x4 $3, %zmm0, %xmm3 ; NODQ-NEXT: vmovq %xmm3, %rax ; NODQ-NEXT: vcvtusi2ss %rax, %xmm5, %xmm4 ; NODQ-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0],xmm2[3] diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll index 448c21d93ac..92118100bba 100644 --- a/llvm/test/CodeGen/X86/ftrunc.ll +++ b/llvm/test/CodeGen/X86/ftrunc.ll @@ -289,12 +289,12 @@ define <2 x double> @trunc_signed_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v2f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm0, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rcx ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_signed_v2f64: @@ -315,20 +315,20 @@ define <4 x double> @trunc_signed_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_signed_v4f64: ; SSE2: # %bb.0: ; SSE2-NEXT: cvttsd2si %xmm1, %rax -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE2-NEXT: cvttsd2si %xmm1, %rcx ; SSE2-NEXT: cvttsd2si %xmm0, %rdx -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: cvttsd2si %xmm0, %rsi ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rdx, %xmm0 ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rsi, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 ; SSE2-NEXT: cvtsi2sd %rcx, %xmm2 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: trunc_signed_v4f64: diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll index 8d43a1b7323..980956bdaa8 100644 --- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -321,7 +321,7 @@ define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" { ; ; ALL-LABEL: test_zext_cmp11: ; ALL: # %bb.0: # %entry -; ALL-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; ALL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 ; ALL-NEXT: vucomisd %xmm2, %xmm0 ; ALL-NEXT: sete %al ; ALL-NEXT: vucomisd %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/pr42905.ll b/llvm/test/CodeGen/X86/pr42905.ll index bb51aced225..310a173f824 100644 --- a/llvm/test/CodeGen/X86/pr42905.ll +++ b/llvm/test/CodeGen/X86/pr42905.ll @@ -11,7 +11,7 @@ define <4 x double> @autogen_SD30452(i1 %L230) { ; CHECK-NEXT: movq %xmm2, %rax ; CHECK-NEXT: xorps %xmm2, %xmm2 ; CHECK-NEXT: cvtsi2sd %rax, %xmm2 -; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] ; CHECK-NEXT: cvtdq2pd %xmm1, %xmm1 ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sqrt-partial.ll b/llvm/test/CodeGen/X86/sqrt-partial.ll index 7ed68c10849..48914d8ed44 100644 --- a/llvm/test/CodeGen/X86/sqrt-partial.ll +++ b/llvm/test/CodeGen/X86/sqrt-partial.ll @@ -38,7 +38,7 @@ define float @f(float %val) nounwind { define double @d(double %val) nounwind { ; SSE-LABEL: d: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: xorpd %xmm1, %xmm1 ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: jb .LBB1_2 ; SSE-NEXT: # %bb.1: # %.split @@ -49,7 +49,7 @@ define double @d(double %val) nounwind { ; ; AVX-LABEL: d: ; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; AVX-NEXT: vucomisd %xmm1, %xmm0 ; AVX-NEXT: jb .LBB1_2 ; AVX-NEXT: # %bb.1: # %.split diff --git a/llvm/test/CodeGen/X86/undef-label.ll b/llvm/test/CodeGen/X86/undef-label.ll index b4be383d55d..56e0ca907f8 100644 --- a/llvm/test/CodeGen/X86/undef-label.ll +++ b/llvm/test/CodeGen/X86/undef-label.ll @@ -11,7 +11,7 @@ define void @xyz() { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl $g, %eax ; CHECK-NEXT: movq %rax, %xmm0 -; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: ucomisd %xmm1, %xmm0 ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: jnp .LBB0_2 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index fc3233327a5..bf2ea5e067c 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -21,7 +21,7 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] @@ -125,13 +125,13 @@ define <4 x i64> @fptosi_4f64_to_4i64(<4 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm0 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0] @@ -335,7 +335,7 @@ define <4 x i32> @fptoui_2f64_to_4i32(<2 x double> %a) { ; SSE-LABEL: fptoui_2f64_to_4i32: ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rcx ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movd %ecx, %xmm1 @@ -409,7 +409,7 @@ define <4 x i32> @fptoui_2f64_to_2i32(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -482,7 +482,7 @@ define <4 x i32> @fptoui_4f64_to_2i32(<2 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -734,13 +734,13 @@ define <4 x i32> @fptoui_4f64_to_4i32(<4 x double> %a) { ; SSE: # %bb.0: ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: cvttsd2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: cvttsd2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 269879e7f1a..1d0106b75a8 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -27,8 +27,8 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] +; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_2i64_to_2f64: @@ -38,7 +38,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_2i64_to_2f64: @@ -47,7 +47,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_2i64_to_2f64: @@ -56,7 +56,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_2i64_to_2f64: @@ -65,7 +65,7 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_2i64_to_2f64: @@ -237,16 +237,16 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: cvtsi2sd %rax, %xmm3 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2sd %rax, %xmm0 -; SSE2-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm0[0] -; SSE2-NEXT: movaps %xmm2, %xmm0 -; SSE2-NEXT: movaps %xmm3, %xmm1 +; SSE2-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] +; SSE2-NEXT: movapd %xmm2, %xmm0 +; SSE2-NEXT: movapd %xmm3, %xmm1 ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_4i64_to_4f64: @@ -256,14 +256,14 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2 ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1 -; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE41-NEXT: retq ; ; AVX1-LABEL: sitofp_4i64_to_4f64: @@ -273,12 +273,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX1-NEXT: vmovq %xmm1, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; @@ -289,12 +289,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX2-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX2-NEXT: vmovq %xmm1, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; @@ -305,12 +305,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; @@ -321,12 +321,12 @@ define <4 x double> @sitofp_4i64_to_4f64(<4 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -1204,7 +1204,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1235,7 +1235,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero ; SSE2-NEXT: retq ; @@ -1274,7 +1274,7 @@ define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1304,7 +1304,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; SSE2-NEXT: retq ; @@ -1342,7 +1342,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -1927,7 +1927,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -2074,7 +2074,7 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -2216,7 +2216,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; @@ -3023,7 +3023,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_2i64_to_2f64: @@ -3034,7 +3034,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_load_2i64_to_2f64: @@ -3044,7 +3044,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; VEX-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_load_2i64_to_2f64: @@ -3054,7 +3054,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_2i64_to_2f64: @@ -3064,7 +3064,7 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: @@ -3220,7 +3220,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE2-NEXT: movq %xmm1, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 -; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: xorps %xmm1, %xmm1 ; SSE2-NEXT: cvtsi2sd %rax, %xmm1 @@ -3228,7 +3228,7 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE2-NEXT: movq %xmm2, %rax ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: cvtsi2sd %rax, %xmm2 -; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_load_4i64_to_4f64: @@ -3240,64 +3240,64 @@ define <4 x double> @sitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2sd %rax, %xmm0 -; SSE41-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; SSE41-NEXT: pextrq $1, %xmm1, %rax ; SSE41-NEXT: xorps %xmm2, %xmm2 ; SSE41-NEXT: cvtsi2sd %rax, %xmm2 ; SSE41-NEXT: movq %xmm1, %rax ; SSE41-NEXT: xorps %xmm1, %xmm1 ; SSE41-NEXT: cvtsi2sd %rax, %xmm1 -; SSE41-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; SSE41-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_load_4i64_to_4f64: ; VEX: # %bb.0: -; VEX-NEXT: vmovdqa (%rdi), %xmm0 +; VEX-NEXT: vmovapd (%rdi), %xmm0 ; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 ; VEX-NEXT: vpextrq $1, %xmm1, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; VEX-NEXT: vmovq %xmm1, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; VEX-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; VEX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; VEX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; VEX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_load_4i64_to_4f64: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovapd (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vpextrq $1, %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512F-NEXT: vmovq %xmm1, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512F-NEXT: vpextrq $1, %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_load_4i64_to_4f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovapd (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vpextrq $1, %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX512VL-NEXT: vmovq %xmm1, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] ; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 -; AVX512VL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] ; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -4288,7 +4288,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; VEX-LABEL: sitofp_load_8i64_to_8f32: ; VEX: # %bb.0: -; VEX-NEXT: vmovdqa (%rdi), %xmm0 +; VEX-NEXT: vmovaps (%rdi), %xmm0 ; VEX-NEXT: vmovdqa 16(%rdi), %xmm1 ; VEX-NEXT: vmovdqa 32(%rdi), %xmm2 ; VEX-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4319,7 +4319,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512F-LABEL: sitofp_load_8i64_to_8f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4350,7 +4350,7 @@ define <8 x float> @sitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512VL-LABEL: sitofp_load_8i64_to_8f32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -4648,7 +4648,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; VEX-LABEL: uitofp_load_4i64_to_4f32: ; VEX: # %bb.0: ; VEX-NEXT: vmovdqa (%rdi), %xmm2 -; VEX-NEXT: vmovdqa 16(%rdi), %xmm0 +; VEX-NEXT: vmovaps 16(%rdi), %xmm0 ; VEX-NEXT: vpextrq $1, %xmm2, %rax ; VEX-NEXT: testq %rax, %rax ; VEX-NEXT: js .LBB81_1 @@ -5167,7 +5167,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; VEX-LABEL: uitofp_load_8i64_to_8f32: ; VEX: # %bb.0: ; VEX-NEXT: vmovdqa (%rdi), %xmm1 -; VEX-NEXT: vmovdqa 16(%rdi), %xmm0 +; VEX-NEXT: vmovaps 16(%rdi), %xmm0 ; VEX-NEXT: vmovdqa 32(%rdi), %xmm4 ; VEX-NEXT: vmovdqa 48(%rdi), %xmm3 ; VEX-NEXT: vpextrq $1, %xmm4, %rax @@ -5293,7 +5293,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512F-LABEL: uitofp_load_8i64_to_8f32: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; AVX512F-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512F-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm3 @@ -5324,7 +5324,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; ; AVX512VL-LABEL: uitofp_load_8i64_to_8f32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; AVX512VL-NEXT: vmovdqa 16(%rdi), %xmm1 ; AVX512VL-NEXT: vmovdqa 32(%rdi), %xmm2 ; AVX512VL-NEXT: vmovdqa 48(%rdi), %xmm3 |