diff options
author | Oliver Stannard <oliver.stannard@arm.com> | 2014-08-21 12:50:31 +0000 |
---|---|---|
committer | Oliver Stannard <oliver.stannard@arm.com> | 2014-08-21 12:50:31 +0000 |
commit | 51b1d460cb77a726546099f857de0492433bc321 (patch) | |
tree | e05a0172d880987a30d1737cc554f6e411fa5b38 /llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll | |
parent | 18b2a258c33368fad430750ad18b2b70f23dcf35 (diff) | |
download | bcm5719-llvm-51b1d460cb77a726546099f857de0492433bc321.tar.gz bcm5719-llvm-51b1d460cb77a726546099f857de0492433bc321.zip |
[ARM] Enable DP copy, load and store instructions for FPv4-SP
The FPv4-SP floating-point unit is generally referred to as
single-precision only, but it does have double-precision registers and
load, store and GPR<->DPR move instructions which operate on them.
This patch enables the use of these registers, the main advantage of
which is that we now comply with the AAPCS-VFP calling convention.
This partially reverts r209650, which added some AAPCS-VFP support,
but did not handle return values or alignment of double arguments in
registers.
This patch also adds tests for Thumb2 code generation for
floating-point instructions and intrinsics, which previously only
existed for ARM.
llvm-svn: 216172
Diffstat (limited to 'llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll')
-rw-r--r-- | llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll | 210 |
1 files changed, 210 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll new file mode 100644 index 00000000000..b29ab35c8f6 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -0,0 +1,210 @@ +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=SOFT -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP +; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP + +declare float @llvm.sqrt.f32(float %Val) +define float @sqrt_f(float %a) { +; CHECK-LABEL: sqrt_f: +; SOFT: bl sqrtf +; HARD: vsqrt.f32 s0, s0 + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +declare float @llvm.powi.f32(float %Val, i32 %power) +define float @powi_f(float %a, i32 %b) { +; CHECK-LABEL: powi_f: +; SOFT: bl __powisf2 +; HARD: b __powisf2 + %1 = call float @llvm.powi.f32(float %a, i32 %b) + ret float %1 +} + +declare float @llvm.sin.f32(float %Val) +define float @sin_f(float %a) { +; CHECK-LABEL: sin_f: +; SOFT: bl sinf +; HARD: b sinf + %1 = call float @llvm.sin.f32(float %a) + ret float %1 +} + +declare float @llvm.cos.f32(float %Val) +define float @cos_f(float %a) { +; CHECK-LABEL: cos_f: +; SOFT: bl cosf +; HARD: b cosf + %1 = call float @llvm.cos.f32(float %a) + ret float %1 +} + +declare float @llvm.pow.f32(float %Val, float %power) +define float @pow_f(float %a, float %b) { +; CHECK-LABEL: pow_f: +; SOFT: bl powf +; HARD: b powf + %1 = call float @llvm.pow.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.exp.f32(float %Val) +define float @exp_f(float %a) { +; CHECK-LABEL: exp_f: +; SOFT: bl expf +; HARD: b expf + %1 = call float @llvm.exp.f32(float %a) + ret float %1 +} + +declare float @llvm.exp2.f32(float %Val) +define float @exp2_f(float %a) { +; CHECK-LABEL: exp2_f: +; SOFT: bl exp2f +; HARD: b exp2f + %1 = call float @llvm.exp2.f32(float %a) + ret float %1 +} + +declare float @llvm.log.f32(float %Val) +define float @log_f(float %a) { +; CHECK-LABEL: log_f: +; SOFT: bl logf +; HARD: b logf + %1 = call float @llvm.log.f32(float %a) + ret float %1 +} + +declare float @llvm.log10.f32(float %Val) +define float @log10_f(float %a) { +; CHECK-LABEL: log10_f: +; SOFT: bl log10f +; HARD: b log10f + %1 = call float @llvm.log10.f32(float %a) + ret float %1 +} + +declare float @llvm.log2.f32(float %Val) +define float @log2_f(float %a) { +; CHECK-LABEL: log2_f: +; SOFT: bl log2f +; HARD: b log2f + %1 = call float @llvm.log2.f32(float %a) + ret float %1 +} + +declare float @llvm.fma.f32(float %a, float %b, float %c) +define float @fma_f(float %a, float %b, float %c) { +; CHECK-LABEL: fma_f: +; SOFT: bl fmaf +; HARD: vfma.f32 + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +declare float @llvm.fabs.f32(float %Val) +define float @abs_f(float %a) { +; CHECK-LABEL: abs_f: +; SOFT: bic r0, r0, #-2147483648 +; HARD: vabs.f32 + %1 = call float @llvm.fabs.f32(float %a) + ret float %1 +} + +declare float @llvm.copysign.f32(float %Mag, float %Sgn) +define float @copysign_f(float %a, float %b) { +; CHECK-LABEL: copysign_f: +; NONE: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31 +; NONE: bfi r{{[0-9]+}}, [[REG]], #31, #1 +; SP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31 +; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1 +; DP: vmov.i32 [[REG:d[0-9]+]], #0x80000000 +; DP: vbsl [[REG]], d + %1 = call float @llvm.copysign.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.floor.f32(float %Val) +define float @floor_f(float %a) { +; CHECK-LABEL: floor_f: +; SOFT: bl floorf +; HARD: b floorf + %1 = call float @llvm.floor.f32(float %a) + ret float %1 +} + +declare float @llvm.ceil.f32(float %Val) +define float @ceil_f(float %a) { +; CHECK-LABEL: ceil_f: +; SOFT: bl ceilf +; HARD: b ceilf + %1 = call float @llvm.ceil.f32(float %a) + ret float %1 +} + +declare float @llvm.trunc.f32(float %Val) +define float @trunc_f(float %a) { +; CHECK-LABEL: trunc_f: +; SOFT: bl truncf +; HARD: b truncf + %1 = call float @llvm.trunc.f32(float %a) + ret float %1 +} + +declare float @llvm.rint.f32(float %Val) +define float @rint_f(float %a) { +; CHECK-LABEL: rint_f: +; SOFT: bl rintf +; HARD: b rintf + %1 = call float @llvm.rint.f32(float %a) + ret float %1 +} + +declare float @llvm.nearbyint.f32(float %Val) +define float @nearbyint_f(float %a) { +; CHECK-LABEL: nearbyint_f: +; SOFT: bl nearbyintf +; HARD: b nearbyintf + %1 = call float @llvm.nearbyint.f32(float %a) + ret float %1 +} + +declare float @llvm.round.f32(float %Val) +define float @round_f(float %a) { +; CHECK-LABEL: round_f: +; SOFT: bl roundf +; HARD: b roundf + %1 = call float @llvm.round.f32(float %a) + ret float %1 +} + +; FIXME: why does cortex-m4 use vmla, while cortex-a7 uses vmul+vadd? +; (these should be equivalent, even the rounding is the same) +declare float @llvm.fmuladd.f32(float %a, float %b, float %c) +define float @fmuladd_f(float %a, float %b, float %c) { +; CHECK-LABEL: fmuladd_f: +; SOFT: bl __aeabi_fmul +; SOFT: bl __aeabi_fadd +; SP: vmla.f32 +; DP: vmul.f32 +; DP: vadd.f32 + %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c) + ret float %1 +} + +declare i16 @llvm.convert.to.fp16.f32(float %a) +define i16 @f_to_h(float %a) { +; CHECK-LABEL: f_to_h: +; SOFT: bl __gnu_f2h_ieee +; HARD: vcvtb.f16.f32 + %1 = call i16 @llvm.convert.to.fp16.f32(float %a) + ret i16 %1 +} + +declare float @llvm.convert.from.fp16.f32(i16 %a) +define float @h_to_f(i16 %a) { +; CHECK-LABEL: h_to_f: +; SOFT: bl __gnu_h2f_ieee +; HARD: vcvtb.f32.f16 + %1 = call float @llvm.convert.from.fp16.f32(i16 %a) + ret float %1 +} |