diff options
-rw-r--r-- | llvm/include/llvm/IR/IntrinsicsAArch64.td | 6 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-fp.ll | 104 |
3 files changed, 114 insertions, 4 deletions
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index d2a3bec2530..66e03702135 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -851,6 +851,12 @@ def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic; def int_aarch64_sve_fcvtzs_i32f16 : Builtin_SVCVT<"svcvt_s32_f16_m", llvm_nxv4i32_ty, llvm_nxv8f16_ty>; // +// Floating-point arithmetic +// +def int_aarch64_sve_frecps_x: AdvSIMD_2VectorArg_Intrinsic; +def int_aarch64_sve_frsqrts_x: AdvSIMD_2VectorArg_Intrinsic; + +// // Predicate operations // diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 379640eb5d3..af663f378d2 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -139,11 +139,11 @@ let Predicates = [HasSVE] in { defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">; defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>; - defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", null_frag>; - defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", null_frag>; + defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>; + defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>; defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>; - defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", null_frag>; - defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", null_frag>; + defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>; + defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>; defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">; diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll index 08913858886..4af45c198b0 100644 --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -23,3 +23,107 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x dou %res = fadd <vscale x 2 x double> %a, %b ret <vscale x 2 x double> %res } + +define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: fsub_h: +; CHECK: fsub z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = fsub <vscale x 8 x half> %a, %b + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: fsub_s: +; CHECK: fsub z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fsub <vscale x 4 x float> %a, %b + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: fsub_d: +; CHECK: fsub z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = fsub <vscale x 2 x double> %a, %b + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: fmul_h: +; CHECK: fmul z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = fmul <vscale x 8 x half> %a, %b + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: fmul_s: +; CHECK: fmul z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = fmul <vscale x 4 x float> %a, %b + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: fmul_d: +; CHECK: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = fmul <vscale x 2 x double> %a, %b + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @frecps_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: frecps_h: +; CHECK: frecps z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @frecps_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: frecps_s: +; CHECK: frecps z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @frecps_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: frecps_d: +; CHECK: frecps z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) + ret <vscale x 2 x double> %res +} + +define <vscale x 8 x half> @frsqrts_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) { +; CHECK-LABEL: frsqrts_h: +; CHECK: frsqrts z0.h, z0.h, z1.h +; CHECK-NEXT: ret + %res = call <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) + ret <vscale x 8 x half> %res +} + +define <vscale x 4 x float> @frsqrts_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) { +; CHECK-LABEL: frsqrts_s: +; CHECK: frsqrts z0.s, z0.s, z1.s +; CHECK-NEXT: ret + %res = call <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) + ret <vscale x 4 x float> %res +} + +define <vscale x 2 x double> @frsqrts_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) { +; CHECK-LABEL: frsqrts_d: +; CHECK: frsqrts z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %res = call <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) + ret <vscale x 2 x double> %res +} + +declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.frsqrts.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.frsqrts.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>) |