diff options
-rw-r--r-- | clang/include/clang/Basic/arm_neon.td | 10 | ||||
-rw-r--r-- | clang/test/CodeGen/aarch64-neon-fma.c | 29 |
2 files changed, 36 insertions, 3 deletions
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index b1ae1db0f6e..f1b07b70ee0 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -623,10 +623,14 @@ def FMLA : SInst<"vfma", "dddd", "dQd">; def FMLS : SInst<"vfms", "dddd", "fdQfQd">; //////////////////////////////////////////////////////////////////////////////// -// MUL, FMA, FMS definitions with scalar argument +// MUL, MLA, MLS, FMA, FMS definitions with scalar argument def VMUL_N_A64 : IOpInst<"vmul_n", "dds", "Qd", OP_MUL_N>; -def FMLA_N : SOpInst<"vfma_n", "ddds", "fQf", OP_FMLA_N>; -def FMLS_N : SOpInst<"vfms_n", "ddds", "fQf", OP_FMLS_N>; + +def FMLA_N : SOpInst<"vfma_n", "ddds", "fQfQd", OP_FMLA_N>; +def FMLS_N : SOpInst<"vfms_n", "ddds", "fQfQd", OP_FMLS_N>; + +def MLA_N : SOpInst<"vmla_n", "ddds", "Qd", OP_MLA_N>; +def MLS_N : SOpInst<"vmls_n", "ddds", "Qd", OP_MLS_N>; //////////////////////////////////////////////////////////////////////////////// // Logical operations diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c index b3a54be147b..2e549ed44bd 100644 --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -26,6 +26,15 @@ float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { // CHECK-FMA: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s } +float64x2_t test_vmlaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vmlaq_n_f64 + return vmlaq_n_f64(a, b, c); + // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK: fadd {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK-FMA: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) { // CHECK-LABEL: test_vmlsq_n_f32 return vmlsq_n_f32(a, b, c); @@ -44,6 +53,15 @@ float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) { // CHECK-FMA: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s } +float64x2_t test_vmlsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vmlsq_n_f64 + return vmlsq_n_f64(a, b, c); + // CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK: fsub {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d + // CHECK-FMA: dup {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] + // CHECK-FMA: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + float32x2_t test_vmla_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { // CHECK-LABEL: test_vmla_lane_f32_0 return vmla_lane_f32(a, b, v, 0); @@ -171,3 +189,14 @@ float32x4_t test_vmlsq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { // CHECK-FMA: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] } +float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vfmaq_n_f64: + return vfmaq_n_f64(a, b, c); + // CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} + +float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { + // CHECK-LABEL: test_vfmsq_n_f64: + return vfmsq_n_f64(a, b, c); + // CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +} |