diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-09 06:19:12 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2016-12-09 06:19:12 +0000 |
commit | 27c062932a8c3b44fe5d4c4fdbc0310cc32b61c6 (patch) | |
tree | 88180af32faea184738d4ee76428f3b7a0014fe5 /llvm/test/CodeGen/AMDGPU/add.i16.ll | |
parent | 2c84f908afc851a1ba83c6a8471a62db81475ef8 (diff) | |
download | bcm5719-llvm-27c062932a8c3b44fe5d4c4fdbc0310cc32b61c6.tar.gz bcm5719-llvm-27c062932a8c3b44fe5d4c4fdbc0310cc32b61c6.zip |
AMDGPU: Select i16 instructions to VOP3 forms
These were selecting directly to the VOP2 form instead
of VOP3 like the i32 instructions. Fixes regressions in
future commits where an immediate isn't folded because it was
initially used for the second operand.
Because uniform 16-bit operations are promoted to i32, it's
difficult to get a simple testcase where this matters. Fold
failures in SIFoldOperands here tend to be hidden by commute
and fold in SIShrinkInstructions.
llvm-svn: 289189
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/add.i16.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/add.i16.ll | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/add.i16.ll b/llvm/test/CodeGen/AMDGPU/add.i16.ll index 3c7a2c1f897..a41d3071377 100644 --- a/llvm/test/CodeGen/AMDGPU/add.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/add.i16.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}v_test_add_i16: ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] -; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]] ; VI-NEXT: buffer_store_short [[ADD]] define void @v_test_add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -67,7 +67,7 @@ define void @v_test_add_i16_inline_neg1(i16 addrspace(1)* %out, i16 addrspace(1) ; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i32: ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] -; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]] ; VI-NEXT: buffer_store_dword [[ADD]] define void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -86,7 +86,7 @@ define void @v_test_add_i16_zext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1) ; GCN-LABEL: {{^}}v_test_add_i16_zext_to_i64: ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] -; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[A]], [[B]] +; VI-DAG: v_add_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]] ; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0 ; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} define void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { @@ -106,7 +106,7 @@ define void @v_test_add_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1) ; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i32: ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] -; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]] ; VI-NEXT: v_bfe_i32 [[SEXT:v[0-9]+]], [[ADD]], 0, 16 ; VI-NEXT: buffer_store_dword [[SEXT]] define void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 { @@ -126,7 +126,7 @@ define void @v_test_add_i16_sext_to_i32(i32 addrspace(1)* %out, i16 addrspace(1) ; GCN-LABEL: {{^}}v_test_add_i16_sext_to_i64: ; VI: flat_load_ushort [[A:v[0-9]+]] ; VI: flat_load_ushort [[B:v[0-9]+]] -; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[A]], [[B]] +; VI: v_add_u16_e32 [[ADD:v[0-9]+]], [[B]], [[A]] ; VI-NEXT: v_bfe_i32 v[[LO:[0-9]+]], [[ADD]], 0, 16 ; VI-NEXT: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] ; VI-NEXT: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} |