diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/bfe-patterns.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/bfe-patterns.ll | 36 |
1 files changed, 16 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll index a33ab4e379d..69237cfabb8 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}v_ubfe_sub_i32: ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] @@ -48,10 +48,9 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, } ; GCN-LABEL: {{^}}s_ubfe_sub_i32: -; GCN: s_load_dword [[SRC:s[0-9]+]] -; GCN: s_load_dword [[WIDTH:s[0-9]+]] -; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]] -; GCN: v_bfe_u32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] +; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} +; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]] +; GCN: v_bfe_u32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]] define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x @@ -63,11 +62,10 @@ define amdgpu_kernel void @s_ubfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 } ; GCN-LABEL: {{^}}s_ubfe_sub_multi_use_shl_i32: -; GCN: s_load_dword [[SRC:s[0-9]+]] -; GCN: s_load_dword [[WIDTH:s[0-9]+]] -; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]] -; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]] -; GCN-NEXT: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]] +; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} +; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] +; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] +; GCN: s_lshr_b32 s{{[0-9]+}}, [[SHL]], [[SUB]] define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x @@ -126,10 +124,9 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, } ; GCN-LABEL: {{^}}s_sbfe_sub_i32: -; GCN: s_load_dword [[SRC:s[0-9]+]] -; GCN: s_load_dword [[WIDTH:s[0-9]+]] -; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], [[WIDTH]] -; GCN: v_bfe_i32 v{{[0-9]+}}, [[SRC]], 0, [[VWIDTH]] +; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} +; GCN: v_mov_b32_e32 [[VWIDTH:v[0-9]+]], s[[WIDTH]] +; GCN: v_bfe_i32 v{{[0-9]+}}, s[[SRC]], 0, [[VWIDTH]] define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x @@ -141,11 +138,10 @@ define amdgpu_kernel void @s_sbfe_sub_i32(i32 addrspace(1)* %out, i32 %src, i32 } ; GCN-LABEL: {{^}}s_sbfe_sub_multi_use_shl_i32: -; GCN: s_load_dword [[SRC:s[0-9]+]] -; GCN: s_load_dword [[WIDTH:s[0-9]+]] -; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, [[WIDTH]] -; GCN-NEXT: s_lshl_b32 [[SHL:s[0-9]+]], [[SRC]], [[SUB]] -; GCN-NEXT: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]] +; GCN: s_load_dwordx2 s{{\[}}[[SRC:[0-9]+]]:[[WIDTH:[0-9]+]]{{\]}}, s[0:1], {{0xb|0x2c}} +; GCN: s_sub_i32 [[SUB:s[0-9]+]], 32, s[[WIDTH]] +; GCN: s_lshl_b32 [[SHL:s[0-9]+]], s[[SRC]], [[SUB]] +; GCN: s_ashr_i32 s{{[0-9]+}}, [[SHL]], [[SUB]] define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(i32 addrspace(1)* %out, i32 %src, i32 %width) #1 { %id.x = tail call i32 @llvm.amdgcn.workitem.id.x() %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %id.x |