summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/sibling-call.ll
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2017-11-30 22:51:26 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2017-11-30 22:51:26 +0000
commit84445dd13c4b3b783e63ff9ebd5871b1ae7386d8 (patch)
tree0be329bb21a8eb142540840cc66191abaee437bf /llvm/test/CodeGen/AMDGPU/sibling-call.ll
parentba4014e9dce96618ab4d8f820447df2a86023b74 (diff)
downloadbcm5719-llvm-84445dd13c4b3b783e63ff9ebd5871b1ae7386d8.tar.gz
bcm5719-llvm-84445dd13c4b3b783e63ff9ebd5871b1ae7386d8.zip
AMDGPU: Use gfx9 carry-less add/sub instructions
llvm-svn: 319491
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/sibling-call.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/sibling-call.ll31
1 files changed, 22 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
index 344c05f56cf..f7e8a1d80e9 100644
--- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll
@@ -1,11 +1,13 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,MESA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI,MESA %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,MESA %s
target datalayout = "A5"
+; FIXME: Why is this commuted only sometimes?
; GCN-LABEL: {{^}}i32_fastcc_i32_i32:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
+; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0
+; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
; GCN-NEXT: s_setpc_b64
define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
%add0 = add i32 %arg0, %arg1
@@ -14,7 +16,8 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 {
; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v
+; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0
+; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
; GCN: s_mov_b32 s5, s32
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24
; GCN: s_waitcnt vmcnt(0)
@@ -84,7 +87,10 @@ entry:
; GCN-NEXT: s_mov_b32 s5, s32
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
+
+; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0
+; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
+
; GCN-NEXT: s_setpc_b64 s[30:31]
define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval align 4 %arg1) #1 {
%arg1.load = load i32, i32 addrspace(5)* %arg1, align 4
@@ -123,9 +129,16 @@ entry:
; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4
; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8
-; GCN-DAG: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0
-; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_0]], v0
-; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_1]], v0
+
+; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0
+; CIVI: v_add_{{i|u}}32_e32 v0, vcc, [[LOAD_0]], v0
+; CIVI: v_add_{{i|u}}32_e32 v0, vcc, [[LOAD_1]], v0
+
+
+; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
+; GFX9: v_add_u32_e32 v0, v0, [[LOAD_0]]
+; GFX9: v_add_u32_e32 v0, v0, [[LOAD_1]]
+
; GCN-NEXT: s_setpc_b64
define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 {
%val_firststack = extractvalue [32 x i32] %large, 30
OpenPOWER on IntegriCloud