diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 26 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 76 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 16 | ||||
-rw-r--r-- | llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 150 |
8 files changed, 236 insertions, 50 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 7c8172f1a06..5ac45b7dbcd 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -5098,10 +5098,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Conversion ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_avx512_cvtss2usi : GCCBuiltin<"__builtin_ia32_cvtss2usi">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>; - def int_x86_avx512_cvtss2usi64 : GCCBuiltin<"__builtin_ia32_cvtss2usi64">, - Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_x86_avx512_cvttss2si : GCCBuiltin<"__builtin_ia32_vcvttss2si32">, Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttss2si64 : GCCBuiltin<"__builtin_ia32_vcvttss2si64">, @@ -5116,11 +5112,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvtusi642ss : GCCBuiltin<"__builtin_ia32_cvtusi2ss64">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; - - def int_x86_avx512_cvtsd2usi : GCCBuiltin<"__builtin_ia32_cvtsd2usi">, - Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_cvtsd2usi64 : GCCBuiltin<"__builtin_ia32_cvtsd2usi64">, - Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>; def int_x86_avx512_cvttsd2si : GCCBuiltin<"__builtin_ia32_vcvttsd2si32">, Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_vcvttsd2si64">, @@ -5135,7 +5126,22 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_cvtusi642sd : GCCBuiltin<"__builtin_ia32_cvtusi2sd64">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>; - + def int_x86_avx512_vcvtss2usi32 : GCCBuiltin<"__builtin_ia32_vcvtss2usi32">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtss2usi64 : GCCBuiltin<"__builtin_ia32_vcvtss2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtss2si32 : GCCBuiltin<"__builtin_ia32_vcvtss2si32">, + Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtss2si64 : GCCBuiltin<"__builtin_ia32_vcvtss2si64">, + Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtsd2usi32 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi32">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtsd2usi64 : GCCBuiltin<"__builtin_ia32_vcvtsd2usi64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtsd2si32 : GCCBuiltin<"__builtin_ia32_vcvtsd2si32">, + Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_vcvtsd2si64 : GCCBuiltin<"__builtin_ia32_vcvtsd2si64">, + Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_cvtsi2ss32 : GCCBuiltin<"__builtin_ia32_cvtsi2ss32">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 99c41adfc75..012751529bf 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21148,6 +21148,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VFPCLASS: return "X86ISD::VFPCLASS"; case X86ISD::VFPCLASSS: return "X86ISD::VFPCLASSS"; case X86ISD::MULTISHIFT: return "X86ISD::MULTISHIFT"; + case X86ISD::SCALAR_FP_TO_SINT_RND: return "X86ISD::SCALAR_FP_TO_SINT_RND"; + case X86ISD::SCALAR_FP_TO_UINT_RND: return "X86ISD::SCALAR_FP_TO_UINT_RND"; } return nullptr; } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index f6f8bbe46ab..ee6e87314fa 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -470,6 +470,8 @@ namespace llvm { // Vector float/double to signed/unsigned integer. FP_TO_SINT_RND, FP_TO_UINT_RND, + // Scalar float/double to signed/unsigned integer. + SCALAR_FP_TO_SINT_RND, SCALAR_FP_TO_UINT_RND, // Save xmm argument registers to the stack, according to %al. An operator // is needed so that this can be expanded with control flow. VASTART_SAVE_XMM_REGS, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index a7becf3ec7d..a7a5a129f81 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4945,54 +4945,66 @@ def : Pat<(f64 (uint_to_fp GR64:$src)), //===----------------------------------------------------------------------===// // AVX-512 Scalar convert from float/double to integer //===----------------------------------------------------------------------===// -multiclass avx512_cvt_s_int_round<bits<8> opc, RegisterClass SrcRC, - RegisterClass DstRC, Intrinsic Int, - Operand memop, ComplexPattern mem_cpat, string asm> { +multiclass avx512_cvt_s_int_round<bits<8> opc, X86VectorVTInfo SrcVT , + X86VectorVTInfo DstVT, SDNode OpNode, string asm> { let hasSideEffects = 0, Predicates = [HasAVX512] in { - def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), + def rr : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src), !strconcat(asm,"\t{$src, $dst|$dst, $src}"), - [(set DstRC:$dst, (Int SrcRC:$src))]>, EVEX, VEX_LIG; - def rb : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src, AVX512RC:$rc), - !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), []>, + [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 FROUND_CURRENT)))]>, + EVEX, VEX_LIG; + def rb : SI<opc, MRMSrcReg, (outs DstVT.RC:$dst), (ins SrcVT.RC:$src, AVX512RC:$rc), + !strconcat(asm,"\t{$rc, $src, $dst|$dst, $src, $rc}"), + [(set DstVT.RC:$dst, (OpNode (SrcVT.VT SrcVT.RC:$src),(i32 imm:$rc)))]>, EVEX, VEX_LIG, EVEX_B, EVEX_RC; let mayLoad = 1 in - def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), - !strconcat(asm,"\t{$src, $dst|$dst, $src}"), []>, EVEX, VEX_LIG; - } // hasSideEffects = 0, Predicates = [HasAVX512] + def rm : SI<opc, MRMSrcMem, (outs DstVT.RC:$dst), (ins SrcVT.ScalarMemOp:$src), + !strconcat(asm,"\t{$src, $dst|$dst, $src}"), + [(set DstVT.RC:$dst, (OpNode + (SrcVT.VT (scalar_to_vector (SrcVT.ScalarLdFrag addr:$src))), + (i32 FROUND_CURRENT)))]>, + EVEX, VEX_LIG; + } // hasSideEffects = 0, Predicates = [HasAVX512] } // Convert float/double to signed/unsigned int 32/64 -defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse_cvtss2si, - ssmem, sse_load_f32, "cvtss2si">, +defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info, + X86cvtss2si, "cvtss2si">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, - int_x86_sse_cvtss2si64, - ssmem, sse_load_f32, "cvtss2si">, +defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, + X86cvtss2si, "cvtss2si">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, - int_x86_avx512_cvtss2usi, - ssmem, sse_load_f32, "cvtss2usi">, +defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, + X86cvtss2usi, "cvtss2usi">, XS, EVEX_CD8<32, CD8VT1>; -defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64, - int_x86_avx512_cvtss2usi64, ssmem, - sse_load_f32, "cvtss2usi">, XS, VEX_W, +defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, + X86cvtss2usi, "cvtss2usi">, XS, VEX_W, EVEX_CD8<32, CD8VT1>; -defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, VR128X, GR32, int_x86_sse2_cvtsd2si, - sdmem, sse_load_f64, "cvtsd2si">, +defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, + X86cvtsd2si, "cvtsd2si">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, VR128X, GR64, - int_x86_sse2_cvtsd2si64, - sdmem, sse_load_f64, "cvtsd2si">, +defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, + X86cvtsd2si, "cvtsd2si">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, VR128X, GR32, - int_x86_avx512_cvtsd2usi, - sdmem, sse_load_f64, "cvtsd2usi">, +defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, + X86cvtsd2usi, "cvtsd2usi">, XD, EVEX_CD8<64, CD8VT1>; -defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, VR128X, GR64, - int_x86_avx512_cvtsd2usi64, sdmem, - sse_load_f64, "cvtsd2usi">, XD, VEX_W, +defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, + X86cvtsd2usi, "cvtsd2usi">, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +// The SSE version of these instructions are disabled for AVX512. +// Therefore, the SSE intrinsics are mapped to the AVX512 instructions. +let Predicates = [HasAVX512] in { + def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))), + (VCVTSS2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))), + (VCVTSS2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))), + (VCVTSD2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>; + def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))), + (VCVTSD2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>; +} // HasAVX512 + let isCodeGenOnly = 1 , Predicates = [HasAVX512] in { defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X, int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 9ad49f21bb1..3f50bd063fb 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -542,6 +542,12 @@ def X86cvttss2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSFloatToIntRnd> def X86cvttss2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSFloatToIntRnd>; def X86cvttsd2IntRnd : SDNode<"X86ISD::FP_TO_SINT_RND", SDTSDoubleToIntRnd>; def X86cvttsd2UIntRnd : SDNode<"X86ISD::FP_TO_UINT_RND", SDTSDoubleToIntRnd>; + +def X86cvtsd2si : SDNode<"X86ISD::SCALAR_FP_TO_SINT_RND", SDTSDoubleToIntRnd>; +def X86cvtsd2usi : SDNode<"X86ISD::SCALAR_FP_TO_UINT_RND", SDTSDoubleToIntRnd>; +def X86cvtss2si : SDNode<"X86ISD::SCALAR_FP_TO_SINT_RND", SDTSFloatToIntRnd>; +def X86cvtss2usi : SDNode<"X86ISD::SCALAR_FP_TO_UINT_RND", SDTSFloatToIntRnd>; + // Vector with rounding mode // cvtt fp-to-int staff diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 5c670082104..689b42a4ac9 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -2135,6 +2135,22 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0), X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI), + X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_SINT_RND, 0), + X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_SINT_RND, 0), + X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_UINT_RND, 0), + X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_UINT_RND, 0), + X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_SINT_RND, 0), + X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_SINT_RND, 0), + X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_UINT_RND, 0), + X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, + X86ISD::SCALAR_FP_TO_UINT_RND, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd, INTR_TYPE_3OP, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_vfmadd_pd_256, INTR_TYPE_3OP, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(fma_vfmadd_ps, INTR_TYPE_3OP, X86ISD::FMADD, 0), diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index dda01d1a572..b26a4b55505 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2275,10 +2275,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { case llvm::Intrinsic::bswap: handleBswap(I); break; - case llvm::Intrinsic::x86_avx512_cvtsd2usi64: - case llvm::Intrinsic::x86_avx512_cvtsd2usi: - case llvm::Intrinsic::x86_avx512_cvtss2usi64: - case llvm::Intrinsic::x86_avx512_cvtss2usi: + case llvm::Intrinsic::x86_avx512_vcvtsd2usi64: + case llvm::Intrinsic::x86_avx512_vcvtsd2usi32: + case llvm::Intrinsic::x86_avx512_vcvtss2usi64: + case llvm::Intrinsic::x86_avx512_vcvtss2usi32: case llvm::Intrinsic::x86_avx512_cvttss2usi64: case llvm::Intrinsic::x86_avx512_cvttss2usi: case llvm::Intrinsic::x86_avx512_cvttsd2usi64: diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 756a3b5e510..34674f5bc4e 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -424,12 +424,154 @@ declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32) nounwind readnone define i64 @test_x86_avx512_cvtsd2usi64(<2 x double> %a0) { ; CHECK-LABEL: test_x86_avx512_cvtsd2usi64: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsd2usi %xmm0, %rax +; CHECK-NEXT: vcvtsd2usi %xmm0, %rcx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %rdx +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax ; CHECK-NEXT: retq - %res = call i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double> %a0) ; <i64> [#uses=1] - ret i64 %res + + %res = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 4) + %res1 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 3) + %res2 = call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %a0, i32 1) + %res3 = add i64 %res, %res1 + %res4 = add i64 %res3, %res2 + ret i64 %res4 +} +declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32) nounwind readnone + +define i64 @test_x86_avx512_cvtsd2si64(<2 x double> %a0) { +; CHECK-LABEL: test_x86_avx512_cvtsd2si64: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsd2si %xmm0, %rcx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %rdx +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: retq + + %res = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 4) + %res1 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 3) + %res2 = call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %a0, i32 1) + %res3 = add i64 %res, %res1 + %res4 = add i64 %res3, %res2 + ret i64 %res4 +} +declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32) nounwind readnone + +define i64 @test_x86_avx512_cvtss2usi64(<4 x float> %a0) { +; CHECK-LABEL: test_x86_avx512_cvtss2usi64: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtss2usi %xmm0, %rcx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %rdx +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: retq + + %res = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 4) + %res1 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 3) + %res2 = call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %a0, i32 1) + %res3 = add i64 %res, %res1 + %res4 = add i64 %res3, %res2 + ret i64 %res4 +} +declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32) nounwind readnone + +define i64 @test_x86_avx512_cvtss2si64(<4 x float> %a0) { +; CHECK-LABEL: test_x86_avx512_cvtss2si64: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtss2si %xmm0, %rcx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %rax +; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %rdx +; CHECK-NEXT: addq %rcx, %rax +; CHECK-NEXT: addq %rdx, %rax +; CHECK-NEXT: retq + + %res = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 4) + %res1 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 3) + %res2 = call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %a0, i32 1) + %res3 = add i64 %res, %res1 + %res4 = add i64 %res3, %res2 + ret i64 %res4 +} +declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) { +; CHECK-LABEL: test_x86_avx512_cvtsd2usi32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsd2usi %xmm0, %ecx +; CHECK-NEXT: vcvtsd2usi {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtsd2usi {rd-sae}, %xmm0, %edx +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retq + + %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 3) + %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 1) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 +} +declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) { +; CHECK-LABEL: test_x86_avx512_cvtsd2si32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtsd2si %xmm0, %ecx +; CHECK-NEXT: vcvtsd2si {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtsd2si {rd-sae}, %xmm0, %edx +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retq + + %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 3) + %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 1) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 +} +declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) { +; CHECK-LABEL: test_x86_avx512_cvtss2usi32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtss2usi %xmm0, %ecx +; CHECK-NEXT: vcvtss2usi {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtss2usi {rd-sae}, %xmm0, %edx +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retq + + %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 3) + %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 1) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 +} +declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone + +define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) { +; CHECK-LABEL: test_x86_avx512_cvtss2si32: +; CHECK: ## BB#0: +; CHECK-NEXT: vcvtss2si %xmm0, %ecx +; CHECK-NEXT: vcvtss2si {rz-sae}, %xmm0, %eax +; CHECK-NEXT: vcvtss2si {rd-sae}, %xmm0, %edx +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: addl %edx, %eax +; CHECK-NEXT: retq + + %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4) + %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 3) + %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 1) + %res3 = add i32 %res, %res1 + %res4 = add i32 %res3, %res2 + ret i32 %res4 } -declare i64 @llvm.x86.avx512.cvtsd2usi64(<2 x double>) nounwind readnone +declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone define <16 x float> @test_x86_vcvtph2ps_512(<16 x i16> %a0) { ; CHECK-LABEL: test_x86_vcvtph2ps_512: |