summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniil Fukalov <daniil.fukalov@amd.com>2018-06-08 16:29:04 +0000
committerDaniil Fukalov <daniil.fukalov@amd.com>2018-06-08 16:29:04 +0000
commitc9a098b314ecb206f439aa70a893c1620fb2e96b (patch)
tree816478d0b524bf8a63e190f5f12cf64fad757731
parent37433dc2e143ddface44d01611b5a82455874b99 (diff)
downloadbcm5719-llvm-c9a098b314ecb206f439aa70a893c1620fb2e96b.tar.gz
bcm5719-llvm-c9a098b314ecb206f439aa70a893c1620fb2e96b.zip
[AMDGPU] Inline asm - added i16, half and i128 types support
AMDGPU inline assembler support i16, half and i128 typed variables in constraints, but they were reported as error. Needed to fix https://github.com/RadeonOpenCompute/ROCm/issues/341, e.g. to be able to load with global_load_dwordx4 to a 128bit integer variable Differential Revision: https://reviews.llvm.org/D44920 llvm-svn: 334301
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp48
-rw-r--r--llvm/test/CodeGen/AMDGPU/inline-constraints.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/inlineasm-16.ll22
-rw-r--r--llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll47
4 files changed, 65 insertions, 70 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5b090ccf4a6..055b63ccfc4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -7779,11 +7779,11 @@ std::pair<unsigned, const TargetRegisterClass *>
SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- if (!isTypeLegal(VT))
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
-
+ const TargetRegisterClass *RC = nullptr;
if (Constraint.size() == 1) {
switch (Constraint[0]) {
+ default:
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
case 's':
case 'r':
switch (VT.getSizeInBits()) {
@@ -7791,40 +7791,56 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
return std::make_pair(0U, nullptr);
case 32:
case 16:
- return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass);
+ RC = &AMDGPU::SReg_32_XM0RegClass;
+ break;
case 64:
- return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
+ RC = &AMDGPU::SGPR_64RegClass;
+ break;
case 128:
- return std::make_pair(0U, &AMDGPU::SReg_128RegClass);
+ RC = &AMDGPU::SReg_128RegClass;
+ break;
case 256:
- return std::make_pair(0U, &AMDGPU::SReg_256RegClass);
+ RC = &AMDGPU::SReg_256RegClass;
+ break;
case 512:
- return std::make_pair(0U, &AMDGPU::SReg_512RegClass);
+ RC = &AMDGPU::SReg_512RegClass;
+ break;
}
-
+ break;
case 'v':
switch (VT.getSizeInBits()) {
default:
return std::make_pair(0U, nullptr);
case 32:
case 16:
- return std::make_pair(0U, &AMDGPU::VGPR_32RegClass);
+ RC = &AMDGPU::VGPR_32RegClass;
+ break;
case 64:
- return std::make_pair(0U, &AMDGPU::VReg_64RegClass);
+ RC = &AMDGPU::VReg_64RegClass;
+ break;
case 96:
- return std::make_pair(0U, &AMDGPU::VReg_96RegClass);
+ RC = &AMDGPU::VReg_96RegClass;
+ break;
case 128:
- return std::make_pair(0U, &AMDGPU::VReg_128RegClass);
+ RC = &AMDGPU::VReg_128RegClass;
+ break;
case 256:
- return std::make_pair(0U, &AMDGPU::VReg_256RegClass);
+ RC = &AMDGPU::VReg_256RegClass;
+ break;
case 512:
- return std::make_pair(0U, &AMDGPU::VReg_512RegClass);
+ RC = &AMDGPU::VReg_512RegClass;
+ break;
}
+ break;
}
+ // We actually support i128, i16 and f16 as inline parameters
+ // even if they are not reported as legal
+ if (RC && (isTypeLegal(VT) || VT.SimpleTy == MVT::i128 ||
+ VT.SimpleTy == MVT::i16 || VT.SimpleTy == MVT::f16))
+ return std::make_pair(0U, RC);
}
if (Constraint.size() > 1) {
- const TargetRegisterClass *RC = nullptr;
if (Constraint[1] == 'v') {
RC = &AMDGPU::VGPR_32RegClass;
} else if (Constraint[1] == 's') {
diff --git a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll
index 941a1b90dcc..e923739ca8f 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll
@@ -4,20 +4,28 @@
; GCN-LABEL: {{^}}inline_reg_constraints:
; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
+; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
+; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]
define amdgpu_kernel void @inline_reg_constraints(i32 addrspace(1)* %ptr) {
entry:
%v32 = tail call i32 asm sideeffect "flat_load_dword $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
- %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
- %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
- %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
- %s64 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
- %s128 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %v2_32 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v64 = tail call i64 asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v4_32 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %v128 = tail call i128 asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr)
+ %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s32_2 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s64 = tail call i64 asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s4_32 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
+ %s128 = tail call i128 asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
%s256 = tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll
index 15e57fe6bff..bf235c5c6b0 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-16.ll
@@ -1,10 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
-; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s
-; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=SICI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -enable-var-scope -check-prefix=GCN %s
; GCN-LABEL: {{^}}s_input_output_i16:
-; SICI: error: couldn't allocate output register for constraint 's'
-; SICI: error: couldn't allocate input reg for constraint 's'
+; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
+; GCN: ; use s[[REG]]
define amdgpu_kernel void @s_input_output_i16() #0 {
%v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
tail call void asm sideeffect "; use $0", "s"(i16 %v) #0
@@ -12,8 +12,8 @@ define amdgpu_kernel void @s_input_output_i16() #0 {
}
; GCN-LABEL: {{^}}v_input_output_i16:
-; SICI: error: couldn't allocate output register for constraint 'v'
-; SICI: error: couldn't allocate input reg for constraint 'v'
+; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
+; GCN: ; use v[[REG]]
define amdgpu_kernel void @v_input_output_i16() #0 {
%v = tail call i16 asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
tail call void asm sideeffect "; use $0", "v"(i16 %v)
@@ -21,8 +21,8 @@ define amdgpu_kernel void @v_input_output_i16() #0 {
}
; GCN-LABEL: {{^}}s_input_output_f16:
-; SICI: error: couldn't allocate output register for constraint 's'
-; SICI: error: couldn't allocate input reg for constraint 's'
+; GCN: s_mov_b32 s[[REG:[0-9]+]], -1
+; GCN: ; use s[[REG]]
define amdgpu_kernel void @s_input_output_f16() #0 {
%v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"() #0
tail call void asm sideeffect "; use $0", "s"(half %v)
@@ -30,8 +30,8 @@ define amdgpu_kernel void @s_input_output_f16() #0 {
}
; GCN-LABEL: {{^}}v_input_output_f16:
-; SICI: error: couldn't allocate output register for constraint 'v'
-; SICI: error: couldn't allocate input reg for constraint 'v'
+; GCN: v_mov_b32 v[[REG:[0-9]+]], -1
+; GCN: ; use v[[REG]]
define amdgpu_kernel void @v_input_output_f16() #0 {
%v = tail call half asm sideeffect "v_mov_b32 $0, -1", "=v"() #0
tail call void asm sideeffect "; use $0", "v"(half %v)
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
index ae736f53378..cda3f487138 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
@@ -1,5 +1,6 @@
-; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=CI %s
-; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: not llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s
+; RUN: not llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN %s
+; RUN: not llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=GCN -check-prefix=SICI %s
; GCN: error: couldn't allocate output register for constraint 's'
; GCN: error: couldn't allocate input reg for constraint 's'
@@ -19,32 +20,14 @@ define amdgpu_kernel void @v_input_output_i8() {
; GCN: error: couldn't allocate output register for constraint 's'
; GCN: error: couldn't allocate input reg for constraint 's'
-define amdgpu_kernel void @s_input_output_i128() {
- %v = tail call i128 asm sideeffect "s_mov_b32 $0, -1", "=s"()
- tail call void asm sideeffect "; use $0", "s"(i128 %v)
- ret void
-}
-
-; GCN: error: couldn't allocate output register for constraint 's'
-; GCN: error: couldn't allocate input reg for constraint 's'
define amdgpu_kernel void @s_input_output_v8f16() {
%v = tail call <8 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
tail call void asm sideeffect "; use $0", "s"(<8 x half> %v)
ret void
}
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-; VI-NOT: error
-define amdgpu_kernel void @s_input_output_f16() {
- %v = tail call half asm sideeffect "s_mov_b32 $0, -1", "=s"()
- tail call void asm sideeffect "; use $0", "s"(half %v)
- ret void
-}
-
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-
+; SICI: error: couldn't allocate output register for constraint 's'
+; SICI: error: couldn't allocate input reg for constraint 's'
; VI-NOT: error
define amdgpu_kernel void @s_input_output_v2f16() {
%v = tail call <2 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
@@ -52,8 +35,8 @@ define amdgpu_kernel void @s_input_output_v2f16() {
ret void
}
-; CI: error: couldn't allocate output register for constraint 'v'
-; CI: error: couldn't allocate input reg for constraint 'v'
+; SICI: error: couldn't allocate output register for constraint 'v'
+; SICI: error: couldn't allocate input reg for constraint 'v'
; VI-NOT: error
define amdgpu_kernel void @v_input_output_v2f16() {
%v = tail call <2 x half> asm sideeffect "v_mov_b32 $0, -1", "=v"()
@@ -61,20 +44,8 @@ define amdgpu_kernel void @v_input_output_v2f16() {
ret void
}
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-; VI-NOT: error
-define amdgpu_kernel void @s_input_output_i16() {
- %v = tail call i16 asm sideeffect "s_mov_b32 $0, -1", "=s"()
- tail call void asm sideeffect "; use $0", "s"(i16 %v)
- ret void
-}
-
-; FIXME: Should work on all targets?
-
-; CI: error: couldn't allocate output register for constraint 's'
-; CI: error: couldn't allocate input reg for constraint 's'
-
+; SICI: error: couldn't allocate output register for constraint 's'
+; SICI: error: couldn't allocate input reg for constraint 's'
; VI-NOT: error
define amdgpu_kernel void @s_input_output_v2i16() {
%v = tail call <2 x i16> asm sideeffect "s_mov_b32 $0, -1", "=s"()
OpenPOWER on IntegriCloud