diff options
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll index 985b5bf483a..7b243309903 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-2velem.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon -fp-contract=fast -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s +; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check. declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) @@ -382,6 +384,10 @@ define <2 x float> @test_vfma_lane_f32(<2 x float> %a, <2 x float> %b, <2 x floa ; CHECK-LABEL: test_vfma_lane_f32: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfma_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -394,6 +400,10 @@ define <4 x float> @test_vfmaq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x flo ; CHECK-LABEL: test_vfmaq_lane_f32: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmaq_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -406,6 +416,10 @@ define <2 x float> @test_vfma_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x flo ; CHECK-LABEL: test_vfma_laneq_f32: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfma_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -416,6 +430,10 @@ define <4 x float> @test_vfmaq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x fl ; CHECK-LABEL: test_vfmaq_laneq_f32: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmaq_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -426,6 +444,10 @@ define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x floa ; CHECK-LABEL: test_vfms_lane_f32: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfms_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> <i32 1, i32 1> @@ -437,6 +459,10 @@ define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x flo ; CHECK-LABEL: test_vfmsq_lane_f32: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsq_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> @@ -448,6 +474,10 @@ define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x flo ; CHECK-LABEL: test_vfms_laneq_f32: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfms_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> <i32 3, i32 3> @@ -459,6 +489,10 @@ define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x fl ; CHECK-LABEL: test_vfmsq_laneq_f32: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsq_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> @@ -470,6 +504,10 @@ define <2 x double> @test_vfmaq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x ; CHECK-LABEL: test_vfmaq_lane_f64: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmaq_lane_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %lane = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -482,6 +520,10 @@ define <2 x double> @test_vfmaq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x ; CHECK-LABEL: test_vfmaq_laneq_f64: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmaq_laneq_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -492,6 +534,10 @@ define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x ; CHECK-LABEL: test_vfmsq_lane_f64: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsq_lane_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %sub = fsub <1 x double> <double -0.000000e+00>, %v %lane = shufflevector <1 x double> %sub, <1 x double> undef, <2 x i32> zeroinitializer @@ -503,6 +549,10 @@ define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x ; CHECK-LABEL: test_vfmsq_laneq_f64: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsq_laneq_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> <i32 1, i32 1> @@ -514,6 +564,9 @@ define float @test_vfmas_laneq_f32(float %a, float %b, <4 x float> %v) { ; CHECK-LABEL: test_vfmas_laneq_f32 ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXNOS-LABEL: test_vfmas_laneq_f32 +; EXNOS: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] +; EXNOS-NEXT: ret entry: %extract = extractelement <4 x float> %v, i32 3 %0 = tail call float @llvm.fma.f32(float %b, float %extract, float %a) @@ -539,6 +592,9 @@ define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmss_lane_f32 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmss_lane_f32 +; EXYNOS: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; EXYNOS-NEXT: ret entry: %extract.rhs = extractelement <2 x float> %v, i32 1 %extract = fsub float -0.000000e+00, %extract.rhs @@ -561,6 +617,9 @@ define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) { ; CHECK-LABEL: test_vfmsd_laneq_f64 ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsd_laneq_f64 +; EXYNOS: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; EXYNOS-NEXT: ret entry: %extract.rhs = extractelement <2 x double> %v, i32 1 %extract = fsub double -0.000000e+00, %extract.rhs @@ -583,6 +642,9 @@ define float @test_vfmss_lane_f32_0(float %a, float %b, <2 x float> %v) { ; CHECK-LABEL: test_vfmss_lane_f32_0 ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmss_lane_f32_0 +; EXYNOS: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] +; EXYNOS-NEXT: ret entry: %tmp0 = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %tmp1 = extractelement <2 x float> %tmp0, i32 1 @@ -1408,6 +1470,10 @@ define <2 x float> @test_vmul_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmul_lane_f32: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmul_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> %mul = fmul <2 x float> %shuffle, %a @@ -1418,6 +1484,9 @@ define <1 x double> @test_vmul_lane_f64(<1 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmul_lane_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmul_lane_f64: +; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}} +; EXYNOS-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1431,6 +1500,10 @@ define <4 x float> @test_vmulq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulq_lane_f32: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulq_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %mul = fmul <4 x float> %shuffle, %a @@ -1441,6 +1514,10 @@ define <2 x double> @test_vmulq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmulq_lane_f64: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulq_lane_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -1451,6 +1528,10 @@ define <2 x float> @test_vmul_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmul_laneq_f32: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmul_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> %mul = fmul <2 x float> %shuffle, %a @@ -1461,6 +1542,9 @@ define <1 x double> @test_vmul_laneq_f64(<1 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmul_laneq_f64: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmul_laneq_f64: +; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1] +; EXYNOS-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -1474,6 +1558,10 @@ define <4 x float> @test_vmulq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulq_laneq_f32: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulq_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %mul = fmul <4 x float> %shuffle, %a @@ -1484,6 +1572,10 @@ define <2 x double> @test_vmulq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulq_laneq_f64: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulq_laneq_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> %mul = fmul <2 x double> %shuffle, %a @@ -1494,6 +1586,10 @@ define <2 x float> @test_vmulx_lane_f32(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulx_lane_f32: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulx_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[1] +; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> <i32 1, i32 1> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1504,6 +1600,10 @@ define <4 x float> @test_vmulxq_lane_f32(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulxq_lane_f32: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_lane_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; Exynos-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1514,6 +1614,10 @@ define <2 x double> @test_vmulxq_lane_f64(<2 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmulxq_lane_f64: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_lane_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1524,6 +1628,10 @@ define <2 x float> @test_vmulx_laneq_f32(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulx_laneq_f32: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulx_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[3] +; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> <i32 3, i32 3> %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -1534,6 +1642,10 @@ define <4 x float> @test_vmulxq_laneq_f32(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f32: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_laneq_f32: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -1544,6 +1656,10 @@ define <2 x double> @test_vmulxq_laneq_f64(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f64: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[1] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_laneq_f64: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[1] +; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 1> %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -1890,6 +2006,10 @@ define <2 x float> @test_vfma_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x fl ; CHECK-LABEL: test_vfma_lane_f32_0: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfma_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1900,6 +2020,10 @@ define <4 x float> @test_vfmaq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x f ; CHECK-LABEL: test_vfmaq_lane_f32_0: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmaq_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1910,6 +2034,10 @@ define <2 x float> @test_vfma_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x f ; CHECK-LABEL: test_vfma_laneq_f32_0: ; CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfma_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %0 = tail call <2 x float> @llvm.fma.v2f32(<2 x float> %lane, <2 x float> %b, <2 x float> %a) @@ -1920,6 +2048,10 @@ define <4 x float> @test_vfmaq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-LABEL: test_vfmaq_laneq_f32_0: ; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmaq_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %lane = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane, <4 x float> %b, <4 x float> %a) @@ -1930,6 +2062,10 @@ define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x fl ; CHECK-LABEL: test_vfms_lane_f32_0: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfms_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <2 x i32> zeroinitializer @@ -1941,6 +2077,10 @@ define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x f ; CHECK-LABEL: test_vfmsq_lane_f32_0: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsq_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %sub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <2 x float> %sub, <2 x float> undef, <4 x i32> zeroinitializer @@ -1952,6 +2092,10 @@ define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x f ; CHECK-LABEL: test_vfms_laneq_f32_0: ; CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfms_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <2 x i32> zeroinitializer @@ -1963,6 +2107,10 @@ define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x ; CHECK-LABEL: test_vfmsq_laneq_f32_0: ; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsq_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v %lane = shufflevector <4 x float> %sub, <4 x float> undef, <4 x i32> zeroinitializer @@ -1974,6 +2122,10 @@ define <2 x double> @test_vfmaq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 ; CHECK-LABEL: test_vfmaq_laneq_f64_0: ; CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmaq_laneq_f64_0: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %lane = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %0 = tail call <2 x double> @llvm.fma.v2f64(<2 x double> %lane, <2 x double> %b, <2 x double> %a) @@ -1984,6 +2136,10 @@ define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 ; CHECK-LABEL: test_vfmsq_laneq_f64_0: ; CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vfmsq_laneq_f64_0: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v %lane = shufflevector <2 x double> %sub, <2 x double> undef, <2 x i32> zeroinitializer @@ -2787,6 +2943,10 @@ define <2 x float> @test_vmul_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmul_lane_f32_0: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmul_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2797,6 +2957,10 @@ define <4 x float> @test_vmulq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulq_lane_f32_0: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulq_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2807,6 +2971,10 @@ define <2 x float> @test_vmul_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmul_laneq_f32_0: ; CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmul_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %mul = fmul <2 x float> %shuffle, %a @@ -2817,6 +2985,9 @@ define <1 x double> @test_vmul_laneq_f64_0(<1 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmul_laneq_f64_0: ; CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmul_laneq_f64_0: +; EXYNOS: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0] +; EXYNOS-NEXT: ret entry: %0 = bitcast <1 x double> %a to <8 x i8> %1 = bitcast <8 x i8> %0 to double @@ -2830,6 +3001,10 @@ define <4 x float> @test_vmulq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulq_laneq_f32_0: ; CHECK: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulq_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: fmul {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %mul = fmul <4 x float> %shuffle, %a @@ -2840,6 +3015,10 @@ define <2 x double> @test_vmulq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulq_laneq_f64_0: ; CHECK: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulq_laneq_f64_0: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: fmul {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %mul = fmul <2 x double> %shuffle, %a @@ -2850,6 +3029,10 @@ define <2 x float> @test_vmulx_lane_f32_0(<2 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulx_lane_f32_0: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulx_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2860,6 +3043,10 @@ define <4 x float> @test_vmulxq_lane_f32_0(<4 x float> %a, <2 x float> %v) { ; CHECK-LABEL: test_vmulxq_lane_f32_0: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_lane_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x float> %v, <2 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2870,6 +3057,10 @@ define <2 x double> @test_vmulxq_lane_f64_0(<2 x double> %a, <1 x double> %v) { ; CHECK-LABEL: test_vmulxq_lane_f64_0: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_lane_f64_0: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <1 x double> %v, <1 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) @@ -2880,6 +3071,10 @@ define <2 x float> @test_vmulx_laneq_f32_0(<2 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulx_laneq_f32_0: ; CHECK: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulx_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].2s, {{v[0-9]+}}.s[0] +; EXYNOS: mulx {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, [[x]].2s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %a, <2 x float> %shuffle) @@ -2890,6 +3085,10 @@ define <4 x float> @test_vmulxq_laneq_f32_0(<4 x float> %a, <4 x float> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f32_0: ; CHECK: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_laneq_f32_0: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[0] +; EXYNOS: mulx {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer %vmulx2.i = tail call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %a, <4 x float> %shuffle) @@ -2900,9 +3099,51 @@ define <2 x double> @test_vmulxq_laneq_f64_0(<2 x double> %a, <2 x double> %v) { ; CHECK-LABEL: test_vmulxq_laneq_f64_0: ; CHECK: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.d[0] ; CHECK-NEXT: ret +; EXYNOS-LABEL: test_vmulxq_laneq_f64_0: +; EXYNOS: dup [[x:v[0-9]+]].2d, {{v[0-9]+}}.d[0] +; EXYNOS: mulx {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, [[x]].2d +; EXYNOS-NEXT: ret entry: %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer %vmulx2.i = tail call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %a, <2 x double> %shuffle) ret <2 x double> %vmulx2.i } +define <4 x float> @optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { +; CHECK-LABEL: optimize_dup: +; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK-NEXT: ret +; EXYNOS-LABEL: optimize_dup: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS-NEXT: ret +entry: + %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> + %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) + %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> + %1 = fmul <4 x float> %lane2, %c + %s = fsub <4 x float> %0, %1 + ret <4 x float> %s +} + +define <4 x float> @no_optimize_dup(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %v) { +; CHECK-LABEL: no_optimize_dup: +; CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] +; CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +; EXYNOS-LABEL: no_optimize_dup: +; EXYNOS: dup [[x:v[0-9]+]].4s, {{v[0-9]+}}.s[3] +; EXYNOS: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[x]].4s +; EXYNOS: dup [[y:v[0-9]+]].4s, {{v[0-9]+}}.s[1] +; EXYNOS: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, [[y]].4s +; EXYNOS-NEXT: ret +entry: + %lane1 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> + %0 = tail call <4 x float> @llvm.fma.v4f32(<4 x float> %lane1, <4 x float> %b, <4 x float> %a) + %lane2 = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> + %1 = fmul <4 x float> %lane2, %c + %s = fsub <4 x float> %0, %1 + ret <4 x float> %s +} |