diff options
| author | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2016-04-19 19:44:45 +0000 |
|---|---|---|
| committer | Ahmed Bougacha <ahmed.bougacha@gmail.com> | 2016-04-19 19:44:45 +0000 |
| commit | 1d9de10130ffd5444a5cc41a27467da5e25d3f51 (patch) | |
| tree | 41619e2e3a43b964a7b7fa9fd5254830bd9002eb /clang/test | |
| parent | e885d5e4d3fffc40173a8d0c82a6d30b2400bdec (diff) | |
| download | bcm5719-llvm-1d9de10130ffd5444a5cc41a27467da5e25d3f51.tar.gz bcm5719-llvm-1d9de10130ffd5444a5cc41a27467da5e25d3f51.zip | |
[ARM NEON] Define vfms_f32 on ARM, and all vfms using vfma.
r259537 added vfma/vfms to armv7, but the builtin was only lowered
on the AArch64 side. Instead of supporting it on ARM, get rid of it.
The vfms builtin lowered to:
%nb = fsub float -0.0, %b
%r = @llvm.fma.f32(%a, %nb, %c)
Instead, define the operation in terms of vfma, and swap the
multiplicands. It now lowers to:
%na = fsub float -0.0, %a
%r = @llvm.fma.f32(%na, %b, %c)
This matches the instruction more closely, and lets current LLVM
generate the "natural" operand ordering:
fmls.2s v0, v1, v2
instead of the crooked (but equivalent):
fmls.2s v0, v2, v1
Except for theses changes, assembly is identical.
LLVM accepts both commutations, and the LLVM tests in:
test/CodeGen/AArch64/arm64-fmadd.ll
test/CodeGen/AArch64/fp-dp3.ll
test/CodeGen/AArch64/neon-fma.ll
test/CodeGen/ARM/fusedMAC.ll
already check either the new one only, or both.
Also verified against the test-suite unittests.
llvm-svn: 266807
Diffstat (limited to 'clang/test')
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-2velem.c | 112 | ||||
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-fma.c | 14 | ||||
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-intrinsics.c | 56 | ||||
| -rw-r--r-- | clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c | 18 | ||||
| -rw-r--r-- | clang/test/CodeGen/arm_neon_intrinsics.c | 28 |
5 files changed, 128 insertions, 100 deletions
diff --git a/clang/test/CodeGen/aarch64-neon-2velem.c b/clang/test/CodeGen/aarch64-neon-2velem.c index 03c9a858e59..36500f62a5d 100644 --- a/clang/test/CodeGen/aarch64-neon-2velem.c +++ b/clang/test/CodeGen/aarch64-neon-2velem.c @@ -333,10 +333,10 @@ float32x4_t test_vfmaq_laneq_f32(float32x4_t a, float32x4_t b, float32x4_t v) { } // CHECK-LABEL: define <2 x float> @test_vfms_lane_f32(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 1> // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> @@ -348,10 +348,10 @@ float32x2_t test_vfms_lane_f32(float32x2_t a, float32x2_t b, float32x2_t v) { } // CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> <i32 1, i32 1, i32 1, i32 1> // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> @@ -363,10 +363,10 @@ float32x4_t test_vfmsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t v) { } // CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> @@ -378,10 +378,10 @@ float32x2_t test_vfms_laneq_f32(float32x2_t a, float32x2_t b, float32x4_t v) { } // CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> @@ -421,10 +421,10 @@ float64x2_t test_vfmaq_laneq_f64(float64x2_t a, float64x2_t b, float64x2_t v) { } // CHECK-LABEL: define <2 x double> @test_vfmsq_lane_f64(<2 x double> %a, <2 x double> %b, <1 x double> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <2 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> @@ -436,10 +436,10 @@ float64x2_t test_vfmsq_lane_f64(float64x2_t a, float64x2_t b, float64x1_t v) { } // CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> @@ -461,33 +461,33 @@ float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) { } // CHECK-LABEL: define double @test_vfmsd_lane_f64(double %a, double %b, <1 x double> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %v -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> +// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b +// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> // CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 -// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) +// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) // CHECK: ret double [[TMP2]] float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) { return vfmsd_lane_f64(a, b, v, 0); } // CHECK-LABEL: define float @test_vfmss_laneq_f32(float %a, float %b, <4 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b +// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 -// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) +// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) // CHECK: ret float [[TMP2]] float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) { return vfmss_laneq_f32(a, b, v, 3); } // CHECK-LABEL: define double @test_vfmsd_laneq_f64(double %a, double %b, <2 x double> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> +// CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b +// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8> // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) +// CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) // CHECK: ret double [[TMP2]] float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { return vfmsd_laneq_f64(a, b, v, 1); @@ -1955,10 +1955,10 @@ float32x4_t test_vfmaq_laneq_f32_0(float32x4_t a, float32x4_t b, float32x4_t v) } // CHECK-LABEL: define <2 x float> @test_vfms_lane_f32_0(<2 x float> %a, <2 x float> %b, <2 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> @@ -1970,10 +1970,10 @@ float32x2_t test_vfms_lane_f32_0(float32x2_t a, float32x2_t b, float32x2_t v) { } // CHECK-LABEL: define <4 x float> @test_vfmsq_lane_f32_0(<4 x float> %a, <4 x float> %b, <2 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> // CHECK: [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <4 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> @@ -1985,10 +1985,10 @@ float32x4_t test_vfmsq_lane_f32_0(float32x4_t a, float32x4_t b, float32x2_t v) { } // CHECK-LABEL: define <2 x float> @test_vfms_laneq_f32_0(<2 x float> %a, <2 x float> %b, <4 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> @@ -2000,10 +2000,10 @@ float32x2_t test_vfms_laneq_f32_0(float32x2_t a, float32x2_t b, float32x4_t v) { } // CHECK-LABEL: define <4 x float> @test_vfmsq_laneq_f32_0(<4 x float> %a, <4 x float> %b, <4 x float> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> @@ -2029,10 +2029,10 @@ float64x2_t test_vfmaq_laneq_f64_0(float64x2_t a, float64x2_t b, float64x2_t v) } // CHECK-LABEL: define <2 x double> @test_vfmsq_laneq_f64_0(<2 x double> %a, <2 x double> %b, <2 x double> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> // CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> @@ -3508,35 +3508,35 @@ float32x4_t test_vfmaq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { } // CHECK-LABEL: define <2 x float> @test_vfms_n_f32(<2 x float> %a, <2 x float> %b, float %n) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %n, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> [[VECINIT1_I]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP4:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP3]] -// CHECK: [[FMLS_I_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[FMLS1_I_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[FMLS2_I_I:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLS_I_I]], <2 x float> [[TMP4]], <2 x float> [[FMLS1_I_I]]) #2 -// CHECK: ret <2 x float> [[FMLS2_I_I]] +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> +// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> +// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> +// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #2 +// CHECK: ret <2 x float> [[TMP6]] float32x2_t test_vfms_n_f32(float32x2_t a, float32x2_t b, float32_t n) { return vfms_n_f32(a, b, n); } // CHECK-LABEL: define <4 x float> @test_vfmsq_n_f32(<4 x float> %a, <4 x float> %b, float %n) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b // CHECK: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %n, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %n, i32 1 // CHECK: [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %n, i32 2 // CHECK: [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %n, i32 3 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> [[VECINIT3_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP4:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[TMP3]] -// CHECK: [[FMLS_I_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[FMLS1_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[FMLS2_I_I:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLS_I_I]], <4 x float> [[TMP4]], <4 x float> [[FMLS1_I_I]]) #2 -// CHECK: ret <4 x float> [[FMLS2_I_I]] +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> +// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> +// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> +// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #2 +// CHECK: ret <4 x float> [[TMP6]] float32x4_t test_vfmsq_n_f32(float32x4_t a, float32x4_t b, float32_t n) { return vfmsq_n_f32(a, b, n); } diff --git a/clang/test/CodeGen/aarch64-neon-fma.c b/clang/test/CodeGen/aarch64-neon-fma.c index 0fd9fd2ea37..836321af060 100644 --- a/clang/test/CodeGen/aarch64-neon-fma.c +++ b/clang/test/CodeGen/aarch64-neon-fma.c @@ -227,17 +227,17 @@ float64x2_t test_vfmaq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { } // CHECK-LABEL: define <2 x double> @test_vfmsq_n_f64(<2 x double> %a, <2 x double> %b, double %c) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x double> undef, double %c, i32 0 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x double> [[VECINIT_I]], double %c, i32 1 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %b to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[VECINIT1_I]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP3]] -// CHECK: [[FMLS_I_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[FMLS1_I_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[FMLS2_I_I:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLS_I_I]], <2 x double> [[TMP4]], <2 x double> [[FMLS1_I_I]]) #2 -// CHECK: ret <2 x double> [[FMLS2_I_I]] +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> +// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> +// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> +// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #2 +// CHECK: ret <2 x double> [[TMP6]] float64x2_t test_vfmsq_n_f64(float64x2_t a, float64x2_t b, float64_t c) { return vfmsq_n_f64(a, b, c); } diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c index ae0391a8c6f..b087ce91e56 100644 --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -674,43 +674,43 @@ float64x2_t test_vfmaq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vfmaq_f64(v1, v2, v3); } // CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %v1, <2 x float> %v2, <2 x float> %v3) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %v2 // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %v1 to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x float> %v2 to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x float> %v3 to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> -// CHECK: [[TMP4:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[TMP3]] -// CHECK: [[FMLS_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK: [[FMLS1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[FMLS2_I:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[FMLS_I]], <2 x float> [[TMP4]], <2 x float> [[FMLS1_I]]) #4 -// CHECK: ret <2 x float> [[FMLS2_I]] +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> +// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> +// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> +// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 +// CHECK: ret <2 x float> [[TMP6]] float32x2_t test_vfms_f32(float32x2_t v1, float32x2_t v2, float32x2_t v3) { return vfms_f32(v1, v2, v3); } // CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %v1, <4 x float> %v2, <4 x float> %v3) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %v2 // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x float> %v2 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <4 x float> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> -// CHECK: [[TMP4:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, [[TMP3]] -// CHECK: [[FMLS_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[FMLS1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[FMLS2_I:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[FMLS_I]], <4 x float> [[TMP4]], <4 x float> [[FMLS1_I]]) #4 -// CHECK: ret <4 x float> [[FMLS2_I]] +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> +// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> +// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> +// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 +// CHECK: ret <4 x float> [[TMP6]] float32x4_t test_vfmsq_f32(float32x4_t v1, float32x4_t v2, float32x4_t v3) { return vfmsq_f32(v1, v2, v3); } // CHECK-LABEL: define <2 x double> @test_vfmsq_f64(<2 x double> %v1, <2 x double> %v2, <2 x double> %v3) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v2 // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v1 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <2 x double> %v2 to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x double> [[SUB_I]] to <16 x i8> // CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v3 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> -// CHECK: [[TMP4:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, [[TMP3]] -// CHECK: [[FMLS_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[FMLS1_I:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[FMLS2_I:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[FMLS_I]], <2 x double> [[TMP4]], <2 x double> [[FMLS1_I]]) #4 -// CHECK: ret <2 x double> [[FMLS2_I]] +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> +// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x double> +// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> +// CHECK: [[TMP6:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP4]], <2 x double> [[TMP5]], <2 x double> [[TMP3]]) #4 +// CHECK: ret <2 x double> [[TMP6]] float64x2_t test_vfmsq_f64(float64x2_t v1, float64x2_t v2, float64x2_t v3) { return vfmsq_f64(v1, v2, v3); } @@ -22133,15 +22133,15 @@ float64x1_t test_vfma_f64(float64x1_t a, float64x1_t b, float64x1_t c) { } // CHECK-LABEL: define <1 x double> @test_vfms_f64(<1 x double> %a, <1 x double> %b, <1 x double> %c) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB_I]] to <8 x i8> // CHECK: [[TMP2:%.*]] = bitcast <1 x double> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> -// CHECK: [[TMP4:%.*]] = fsub <1 x double> <double -0.000000e+00>, [[TMP3]] -// CHECK: [[FMLS_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> -// CHECK: [[FMLS1_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[FMLS2_I:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[FMLS_I]], <1 x double> [[TMP4]], <1 x double> [[FMLS1_I]]) #4 -// CHECK: ret <1 x double> [[FMLS2_I]] +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> +// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> +// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> +// CHECK: [[TMP6:%.*]] = call <1 x double> @llvm.fma.v1f64(<1 x double> [[TMP4]], <1 x double> [[TMP5]], <1 x double> [[TMP3]]) #4 +// CHECK: ret <1 x double> [[TMP6]] float64x1_t test_vfms_f64(float64x1_t a, float64x1_t b, float64x1_t c) { return vfms_f64(a, b, c); } diff --git a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c index 50ed582614c..39aab2540e8 100644 --- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -178,11 +178,11 @@ float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) { } // CHECK-LABEL: define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %c) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %c -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> [[SUB]] to <8 x i8> +// CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b +// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8> // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> // CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 -// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) +// CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) // CHECK: ret float [[TMP2]] float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) { return vfmss_lane_f32(a, b, c, 1); @@ -203,10 +203,10 @@ float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) { } // CHECK-LABEL: define <1 x double> @test_vfms_lane_f64(<1 x double> %a, <1 x double> %b, <1 x double> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %v to <8 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> // CHECK: [[LANE:%.*]] = shufflevector <1 x double> [[TMP3]], <1 x double> [[TMP3]], <1 x i32> zeroinitializer // CHECK: [[FMLA:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x double> @@ -233,10 +233,10 @@ float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) { } // CHECK-LABEL: define <1 x double> @test_vfms_laneq_f64(<1 x double> %a, <1 x double> %b, <2 x double> %v) #0 { -// CHECK: [[SUB:%.*]] = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %v +// CHECK: [[SUB:%.*]] = fsub <1 x double> <double -0.000000e+00>, %b // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> [[SUB]] to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <1 x double> [[SUB]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %v to <16 x i8> // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to double // CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to double // CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c index 037d62a9c93..4cc7eedffd5 100644 --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -3429,6 +3429,34 @@ float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { return vfmaq_f32(a, b, c); } +// CHECK-LABEL: define <2 x float> @test_vfms_f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %b +// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8> +// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> +// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float> +// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> +// CHECK: [[TMP6:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x float> [[TMP3]]) #4 +// CHECK: ret <2 x float> [[TMP6]] +float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) { + return vfms_f32(a, b, c); +} + +// CHECK-LABEL: define <4 x float> @test_vfmsq_f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +// CHECK: [[SUB_I:%.*]] = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b +// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> +// CHECK: [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8> +// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> +// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float> +// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> +// CHECK: [[TMP6:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x float> [[TMP3]]) #4 +// CHECK: ret <4 x float> [[TMP6]] +float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) { + return vfmsq_f32(a, b, c); +} + // CHECK-LABEL: define <8 x i8> @test_vget_high_s8(<16 x i8> %a) #0 { // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> |

