diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-30 22:51:26 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-11-30 22:51:26 +0000 |
| commit | 84445dd13c4b3b783e63ff9ebd5871b1ae7386d8 (patch) | |
| tree | 0be329bb21a8eb142540840cc66191abaee437bf /llvm/test/CodeGen/AMDGPU/sibling-call.ll | |
| parent | ba4014e9dce96618ab4d8f820447df2a86023b74 (diff) | |
| download | bcm5719-llvm-84445dd13c4b3b783e63ff9ebd5871b1ae7386d8.tar.gz bcm5719-llvm-84445dd13c4b3b783e63ff9ebd5871b1ae7386d8.zip | |
AMDGPU: Use gfx9 carry-less add/sub instructions
llvm-svn: 319491
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/sibling-call.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/sibling-call.ll | 31 |
1 files changed, 22 insertions, 9 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 344c05f56cf..f7e8a1d80e9 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -1,11 +1,13 @@ -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=fiji -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=hawaii -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI,MESA %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa-amdgiz -mcpu=gfx900 -mattr=-flat-for-global -enable-ipra=0 -amdgpu-sroa=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,MESA %s target datalayout = "A5" +; FIXME: Why is this commuted only sometimes? ; GCN-LABEL: {{^}}i32_fastcc_i32_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { %add0 = add i32 %arg0, %arg1 @@ -14,7 +16,8 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 ; GCN: s_mov_b32 s5, s32 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24 ; GCN: s_waitcnt vmcnt(0) @@ -84,7 +87,10 @@ entry: ; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 + +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 + ; GCN-NEXT: s_setpc_b64 s[30:31] define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32 addrspace(5)* byval align 4 %arg1) #1 { %arg1.load = load i32, i32 addrspace(5)* %arg1, align 4 @@ -123,9 +129,16 @@ entry: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4 ; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8 -; GCN-DAG: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_0]], v0 -; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_1]], v0 + +; CIVI-NEXT: v_add_{{i|u}}32_e32 v0, vcc, v1, v0 +; CIVI: v_add_{{i|u}}32_e32 v0, vcc, [[LOAD_0]], v0 +; CIVI: v_add_{{i|u}}32_e32 v0, vcc, [[LOAD_1]], v0 + + +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9: v_add_u32_e32 v0, v0, [[LOAD_0]] +; GFX9: v_add_u32_e32 v0, v0, [[LOAD_1]] + ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { %val_firststack = extractvalue [32 x i32] %large, 30 |

