summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPablo Barrio <pablo.barrio@arm.com>2018-02-15 14:44:22 +0000
committerPablo Barrio <pablo.barrio@arm.com>2018-02-15 14:44:22 +0000
commite28cb8399a057bbe97002d7da78af18a6d56e4ff (patch)
treea568971e61fb9d574badba82616570de65037ecb
parent17bb6f0755876354428e0b88926905ad6e631c1a (diff)
downloadbcm5719-llvm-e28cb8399a057bbe97002d7da78af18a6d56e4ff.tar.gz
bcm5719-llvm-e28cb8399a057bbe97002d7da78af18a6d56e4ff.zip
[ARM] Allow 64- and 128-bit types with 't' inline asm constraint
Summary: In LLVM, 't' selects a floating-point/SIMD register and only supports 32-bit values. This is appropriately documented in the LLVM Language Reference Manual. However, this behaviour diverges from that of GCC, where 't' selects the s0-s31 registers and its qX and dX variants depending on additional operand modifiers (q/P). For example, the following C code: #include <arm_neon.h> float32x4_t a, b, x; asm("vadd.f32 %0, %1, %2" : "=t" (x) : "t" (a), "t" (b)) results in the following assembly if compiled with GCC: vadd.f32 s0, s0, s1 whereas LLVM will show "error: couldn't allocate output register for constraint 't'", since a, b, x are 128-bit variables, not 32-bit. This patch extends the use of 't' to mean that of GCC, thus allowing selection of the lower Q vector regs and their D/S variants. For example, the earlier code will now compile as: vadd.f32 q0, q0, q1 This behaviour still differs from that of GCC but I think it is actually more correct, since LLVM picks up the right register type based on the datatype of x, while GCC would need an extra operand modifier to achieve the same result, as follows: asm("vadd.f32 %q0, %q1, %q2" : "=t" (x) : "t" (a), "t" (b)) Since this is only an extension of functionality, existing code should not be affected by this change. Note that operand modifiers q/P are already supported by LLVM, so this patch should suffice to support inline assembly with constraint 't' originally built for GCC. Reviewers: grosbach, rengolin Reviewed By: rengolin Subscribers: rogfer01, efriedma, olista01, aemerson, javed.absar, eraman, kristof.beyls, llvm-commits Differential Revision: https://reviews.llvm.org/D42962 llvm-svn: 325244
-rw-r--r--llvm/docs/LangRef.rst8
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp6
-rw-r--r--llvm/test/CodeGen/ARM/inlineasm-error-t-toofewregs.ll9
-rw-r--r--llvm/test/CodeGen/ARM/inlineasm.ll32
4 files changed, 51 insertions, 4 deletions
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 965cdd4187f..034a8669023 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3654,8 +3654,8 @@ ARM and ARM's Thumb2 mode:
``d0-d31``, or ``q0-q15``.
- ``x``: A 32, 64, or 128-bit floating-point/SIMD register: ``s0-s15``,
``d0-d7``, or ``q0-q3``.
-- ``t``: A floating-point/SIMD register, only supports 32-bit values:
- ``s0-s31``.
+- ``t``: A low floating-point/SIMD register: ``s0-s31``, ``d0-d16``, or
+ ``q0-q8``.
ARM's Thumb1 mode:
@@ -3674,8 +3674,8 @@ ARM's Thumb1 mode:
``d0-d31``, or ``q0-q15``.
- ``x``: A 32, 64, or 128-bit floating-point/SIMD register: ``s0-s15``,
``d0-d7``, or ``q0-q3``.
-- ``t``: A floating-point/SIMD register, only supports 32-bit values:
- ``s0-s31``.
+- ``t``: A low floating-point/SIMD register: ``s0-s31``, ``d0-d16``, or
+ ``q0-q8``.
Hexagon:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 72fede76e64..c52b619a050 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -13467,8 +13467,14 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
return RCPair(0U, &ARM::QPR_8RegClass);
break;
case 't':
+ if (VT == MVT::Other)
+ break;
if (VT == MVT::f32 || VT == MVT::i32)
return RCPair(0U, &ARM::SPRRegClass);
+ if (VT.getSizeInBits() == 64)
+ return RCPair(0U, &ARM::DPR_VFP2RegClass);
+ if (VT.getSizeInBits() == 128)
+ return RCPair(0U, &ARM::QPR_VFP2RegClass);
break;
}
}
diff --git a/llvm/test/CodeGen/ARM/inlineasm-error-t-toofewregs.ll b/llvm/test/CodeGen/ARM/inlineasm-error-t-toofewregs.ll
new file mode 100644
index 00000000000..ea1b2273911
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/inlineasm-error-t-toofewregs.ll
@@ -0,0 +1,9 @@
+; RUN: not llc -mtriple=armv8-eabi -mattr=+neon %s -o /dev/null 2<&1 | FileCheck %s
+
+; CHECK: inline assembly requires more registers than available
+define <4 x float> @t-constraint-float-vectors-too-few-regs(<4 x float> %a, <4 x float> %b) {
+entry:
+ %0 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> } asm "vadd.F32 $0, $9, $10\0A\09vadd.F32 $1, $9, $10\0A\09vadd.F32 $2, $9, $10\0A\09vadd.F32 $3, $9, $10\0A\09vadd.F32 $4, $9, $10\0A\09vadd.F32 $5, $9, $10\0A\09vadd.F32 $6, $9, $10\0A\09vadd.F32 $7, $9, $10\0A\09vadd.F32 $8, $9, $10", "=t,=t,=t,=t,=t,=t,=t,=t,=t,=t,t,t"(<4 x float> %a, <4 x float> %b)
+ %asmresult = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float> } %0, 0
+ ret <4 x float> %asmresult
+}
diff --git a/llvm/test/CodeGen/ARM/inlineasm.ll b/llvm/test/CodeGen/ARM/inlineasm.ll
index fdb8938884c..1ed7f6951bb 100644
--- a/llvm/test/CodeGen/ARM/inlineasm.ll
+++ b/llvm/test/CodeGen/ARM/inlineasm.ll
@@ -16,3 +16,35 @@ define float @t-constraint-int(i32 %i) {
%ret = call float asm "vcvt.f32.s32 $0, $1\0A", "=t,t"(i32 %i)
ret float %ret
}
+
+define <2 x i32> @t-constraint-int-vector-64bit(<2 x float> %x) {
+entry:
+ ; CHECK-LABEL: t-constraint-int-vector-64bit
+ ; CHECK: vcvt.s32.f32 {{d[0-9]+}}, {{d[0-9]+}}
+ %0 = tail call <2 x i32> asm "vcvt.s32.f32 $0, $1", "=t,t"(<2 x float> %x)
+ ret <2 x i32> %0
+}
+
+define <4 x i32> @t-constraint-int-vector-128bit(<4 x float> %x) {
+entry:
+ ; CHECK-LABEL: t-constraint-int-vector-128bit
+ ; CHECK: vcvt.s32.f32 {{q[0-7]}}, {{q[0-7]}}
+ %0 = tail call <4 x i32> asm "vcvt.s32.f32 $0, $1", "=t,t"(<4 x float> %x)
+ ret <4 x i32> %0
+}
+
+define <2 x float> @t-constraint-float-vector-64bit(<2 x float> %a, <2 x float> %b) {
+entry:
+ ; CHECK-LABEL: t-constraint-float-vector-64bit
+ ; CHECK: vadd.f32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+ %0 = tail call <2 x float> asm "vadd.f32 $0, $1, $2", "=t,t,t"(<2 x float> %a, <2 x float> %b)
+ ret <2 x float> %0
+}
+
+define <4 x float> @t-constraint-float-vector-128bit(<4 x float> %a, <4 x float> %b) {
+entry:
+ ; CHECK-LABEL: t-constraint-float-vector-128bit
+ ; CHECK: vadd.f32 q{{[0-7]}}, q{{[0-7]}}, q{{[0-7]}}
+ %0 = tail call <4 x float> asm "vadd.f32 $0, $1, $2", "=t,t,t"(<4 x float> %a, <4 x float> %b)
+ ret <4 x float> %0
+}
OpenPOWER on IntegriCloud