summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td10
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td30
-rw-r--r--llvm/test/CodeGen/X86/avx512-intrinsics.ll8
3 files changed, 17 insertions, 31 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5de950813cd..018aa9a87d0 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -2955,10 +2955,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty],
[IntrNoMem]>;
- def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512">,
- Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty], [IntrNoMem]>;
- def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512">,
- Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty], [IntrNoMem]>;
+ def int_x86_avx512_sqrt_pd_512 : GCCBuiltin<"__builtin_ia32_sqrtpd512_mask">,
+ Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
+ llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_sqrt_ps_512 : GCCBuiltin<"__builtin_ia32_sqrtps512_mask">,
+ Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
+ llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">,
Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index d2894088b80..92d536356d4 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -3629,7 +3629,6 @@ def : Pat <(v8f64 (int_x86_avx512_rcp28_pd (v8f64 VR512:$src),
(VRCP28PDZrb VR512:$src)>;
multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
- Intrinsic V16F32Int, Intrinsic V8F64Int,
OpndItins itins_s, OpndItins itins_d> {
def PSZrr :AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
!strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
@@ -3655,27 +3654,6 @@ multiclass avx512_sqrt_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
(v8f64 (bitconvert (memopv16f32 addr:$src)))))],
itins_d.rm>, EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>;
-let isCodeGenOnly = 1 in {
- def PSZr_Int : AVX512PSI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr,
- "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V16F32Int VR512:$src))]>,
- EVEX, EVEX_V512;
- def PSZm_Int : AVX512PSI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst,
- (V16F32Int (memopv16f32 addr:$src)))]>, EVEX,
- EVEX_V512, EVEX_CD8<32, CD8VF>;
- def PDZr_Int : AVX512PDI<opc, MRMSrcReg, (outs VR512:$dst), (ins VR512:$src),
- !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V8F64Int VR512:$src))]>,
- EVEX, EVEX_V512, VEX_W;
- def PDZm_Int : AVX512PDI<opc, MRMSrcMem, (outs VR512:$dst), (ins f512mem:$src),
- !strconcat(OpcodeStr,
- "pd\t{$src, $dst|$dst, $src}"),
- [(set VR512:$dst, (V8F64Int (memopv8f64 addr:$src)))]>,
- EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
-} // isCodeGenOnly = 1
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr,
@@ -3744,10 +3722,16 @@ defm VSQRT : avx512_sqrt_scalar<0x51, "sqrt",
int_x86_avx512_sqrt_ss, int_x86_avx512_sqrt_sd,
SSE_SQRTSS, SSE_SQRTSD>,
avx512_sqrt_packed<0x51, "vsqrt", fsqrt,
- int_x86_avx512_sqrt_ps_512, int_x86_avx512_sqrt_pd_512,
SSE_SQRTPS, SSE_SQRTPD>;
let Predicates = [HasAVX512] in {
+ def : Pat<(v16f32 (int_x86_avx512_sqrt_ps_512 (v16f32 VR512:$src1),
+ (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), FROUND_CURRENT)),
+ (VSQRTPSZrr VR512:$src1)>;
+ def : Pat<(v8f64 (int_x86_avx512_sqrt_pd_512 (v8f64 VR512:$src1),
+ (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1), FROUND_CURRENT)),
+ (VSQRTPDZrr VR512:$src1)>;
+
def : Pat<(f32 (fsqrt FR32X:$src)),
(VSQRTSSZr (f32 (IMPLICIT_DEF)), FR32X:$src)>;
def : Pat<(f32 (fsqrt (load addr:$src))),
diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
index 18cfcfe78b0..6f34d4596f9 100644
--- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll
@@ -134,17 +134,17 @@ declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x flo
define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
; CHECK: vsqrtpd
- %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0) ; <<8 x double>> [#uses=1]
+ %res = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ; <<8 x double>> [#uses=1]
ret <8 x double> %res
}
-declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>) nounwind readnone
+declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
; CHECK: vsqrtps
- %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0) ; <<16 x float>> [#uses=1]
+ %res = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ; <<16 x float>> [#uses=1]
ret <16 x float> %res
}
-declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>) nounwind readnone
+declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1) {
; CHECK: vsqrtss {{.*}}encoding: [0x62
OpenPOWER on IntegriCloud