diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-08-14 22:06:05 +0000 |
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2015-08-14 22:06:05 +0000 |
| commit | cd357872178619781f17ced87364c056d7a84c98 (patch) | |
| tree | 6620c2d059a1b3dfd268a285f72c9f956f58e43f /llvm | |
| parent | 24ac55d8841d1c1782d989af6e87c4502c6bda33 (diff) | |
| download | bcm5719-llvm-cd357872178619781f17ced87364c056d7a84c98.tar.gz bcm5719-llvm-cd357872178619781f17ced87364c056d7a84c98.zip | |
[AArch64] Fix FMLS scalar-indexed-from-2s-after-neg patterns.
We canonicalize V64 vectors to V128 through insert_subvector: the other
FMLA/FMLS/FMUL/FMULX patterns match that already, but this one doesn't,
so we'd fail to match fmls and generate fneg+fmla instead.
The vector equivalents are already tested and functional.
llvm-svn: 245107
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll | 55 |
2 files changed, 58 insertions, 1 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 349431eec9b..77293726f50 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4438,7 +4438,9 @@ multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> { (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, V128:$Rm, VectorIndexS:$idx)>; def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn), - (vector_extract (v2f32 (fneg V64:$Rm)), + (vector_extract (v4f32 (insert_subvector undef, + (v2f32 (fneg V64:$Rm)), + (i32 0))), VectorIndexS:$idx))), (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn, (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll index 869966caa3a..985b5bf483a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -535,6 +535,17 @@ entry: declare double @llvm.fma.f64(double, double, double) +define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { +; CHECK-LABEL: test_vfmss_lane_f32 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %extract.rhs = extractelement <2 x float> %v, i32 1 + %extract = fsub float -0.000000e+00, %extract.rhs + %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) + ret float %0 +} + define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmss_laneq_f32 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] @@ -557,6 +568,50 @@ entry: ret double %0 } +define double @test_vfmsd_lane_f64_0(double %a, double %b, <1 x double> %v) { +; CHCK-LABEL: test_vfmsd_lane_f64_0 +; CHCK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +; CHCK-NEXT: ret +entry: + %tmp0 = fsub <1 x double> <double -0.000000e+00>, %v + %tmp1 = extractelement <1 x double> %tmp0, i32 0 + %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) + ret double %0 +} + +define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { +; CHECK-LABEL: test_vfmss_lane_f32_0 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v + %tmp1 = extractelement <2 x float> %tmp0, i32 1 + %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) + ret float %0 +} + +define float @test_vfmss_laneq_f32_0(float %a, float %b, <4 x float> %v) { +; CHECK-LABEL: test_vfmss_laneq_f32_0 +; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret +entry: + %tmp0 = fsub <4 x float><float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v + %tmp1 = extractelement <4 x float> %tmp0, i32 3 + %0 = tail call float @llvm.fma.f32(float %b, float %tmp1, float %a) + ret float %0 +} + +define double @test_vfmsd_laneq_f64_0(double %a, double %b, <2 x double> %v) { +; CHECK-LABEL: test_vfmsd_laneq_f64_0 +; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; CHECK-NEXT: ret +entry: + %tmp0 = fsub <2 x double><double -0.000000e+00, double -0.000000e+00>, %v + %tmp1 = extractelement <2 x double> %tmp0, i32 1 + %0 = tail call double @llvm.fma.f64(double %b, double %tmp1, double %a) + ret double %0 +} + define <4 x i32> @test_vmlal_lane_s16(<4 x i32> %a, <4 x i16> %b, <4 x i16> %v) { ; CHECK-LABEL: test_vmlal_lane_s16: ; CHECK: mlal {{v[0-9]+}}.4s, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] |

