diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsX86.td | 10 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 30 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 8 |
3 files changed, 17 insertions, 31 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index 5de950813cd..018aa9a87d0 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -2955,10 +2955,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512">, - Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], [IntrNoMem]>; - def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">, - Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>; + def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">, + Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, + llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">, + Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, + llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index d2894088b80..92d536356d4 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3629,7 +3629,6 @@ def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src), (VRCP28PDZrb VR512:$src)>; multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, - Intrinsic V16F32Int, Intrinsic V8F64Int, OpndItins itins_s, OpndItins itins_d> { def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), @@ -3655,27 +3654,6 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, (v8f64 (bitconvert (memopv16f32 addr:$src)))))], itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; -let isCodeGenOnly = 1 in { - def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), - !strconcat(OpcodeStr, - "ps\t{$src, $dst|$dst, $src}"), - [(set VR512:$dst, (V16F32Int VR512:$src))]>, - EVEX, EVEX_V512; - def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), - !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), - [(set VR512:$dst, - (V16F32Int (memopv16f32 addr:$src)))]>, EVEX, - EVEX_V512, EVEX_CD8<32, CD8VF>; - def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src), - !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), - [(set VR512:$dst, (V8F64Int VR512:$src))]>, - EVEX, EVEX_V512, VEX_W; - def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src), - !strconcat(OpcodeStr, - "pd\t{$src, $dst|$dst, $src}"), - [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>, - EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -} // isCodeGenOnly = 1 } multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, @@ -3744,10 +3722,16 @@ defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt", int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd, SSE_SQRTSS, SSE_SQRTSD>, avx512_sqrt_packed<0x51, "vsqrt", fsqrt, - int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512, SSE_SQRTPS, SSE_SQRTPD>; let Predicates = [HasAVX512] in { + def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1), + (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)), + (VSQRTPSZrr VR512:$src1)>; + def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1), + (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)), + (VSQRTPDZrr VR512:$src1)>; + def : Pat<(f32 (fsqrt FR32X:$src)), (VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>; def : Pat<(f32 (fsqrt (load addr:$src))), diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 18cfcfe78b0..6f34d4596f9 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -134,17 +134,17 @@ declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x flo define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { ; CHECK: vsqrtpd - %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1] + %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1] ret <8 x double> %res } -declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone +declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { ; CHECK: vsqrtps - %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1] + %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1] ret <16 x float> %res } -declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone +declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) { ; CHECK: vsqrtss {{.*}}encoding: [0x62 |