diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-08-29 07:16:50 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2015-08-29 07:16:50 +0000 |
commit | e4d0c142e8e9cf8b456c06f5ce9641d89b67ac86 (patch) | |
tree | 5476ba4a50871c8be1f624710b2c042bfe6fc32f /llvm/test | |
parent | 9a32cd3d3b79f94aa7e9d016a5e6eeb2650dd8a8 (diff) | |
download | bcm5719-llvm-e4d0c142e8e9cf8b456c06f5ce9641d89b67ac86.tar.gz bcm5719-llvm-e4d0c142e8e9cf8b456c06f5ce9641d89b67ac86.zip |
AMDGPU: Add sdst operand to VOP2b instructions
The VOP3 encoding of these allows any SGPR pair for the i1
output, but this was forced before to always use vcc.
This doesn't yet try to use this, but does add the operand
to the definitions so the main change is adding vcc to the
output of the VOP2 encoding.
llvm-svn: 246358
Diffstat (limited to 'llvm/test')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/add.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/ds_read2st64.ll | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/local-memory-two-objects.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/operand-folding.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/scratch-buffer.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/shl_add_constant.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/sub.ll | 14 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/udivrem.ll | 56 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/vop-shrink.ll | 4 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/vop2-err.s | 3 | ||||
-rw-r--r-- | llvm/test/MC/AMDGPU/vop2.s | 98 |
16 files changed, 135 insertions, 88 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/add.ll b/llvm/test/CodeGen/AMDGPU/add.ll index 655e75dbc1a..2ddfa9649ac 100644 --- a/llvm/test/CodeGen/AMDGPU/add.ll +++ b/llvm/test/CodeGen/AMDGPU/add.ll @@ -5,7 +5,7 @@ ;FUNC-LABEL: {{^}}test1: ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 [[REG:v[0-9]+]], {{v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 [[REG:v[0-9]+]], vcc, {{v[0-9]+, v[0-9]+}} ;SI-NOT: [[REG]] ;SI: buffer_store_dword [[REG]], define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { @@ -21,8 +21,8 @@ define void @test1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 @@ -39,10 +39,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ;EG: ADD_INT {{[* ]*}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -;SI: v_add_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +;SI: v_add_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll index 8c2a0795860..67738f7daff 100644 --- a/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll +++ b/llvm/test/CodeGen/AMDGPU/array-ptr-calc-i32.ll @@ -14,7 +14,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; FIXME: We end up with zero argument for ADD, because ; SIRegisterInfo::eliminateFrameIndex() blindly replaces the frame index ; with the appropriate offset. We should fold this into the store. -; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], 0, v{{[0-9]+}} +; SI-ALLOCA: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 0, v{{[0-9]+}} ; SI-ALLOCA: buffer_store_dword {{v[0-9]+}}, [[PTRREG]], s[{{[0-9]+:[0-9]+}}] ; ; FIXME: The AMDGPUPromoteAlloca pass should be able to convert this @@ -22,7 +22,7 @@ declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate ; to interpret: ; getelementptr [4 x i32], [4 x i32]* %alloca, i32 1, i32 %b -; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], 16 +; SI-PROMOTE: v_add_i32_e32 [[PTRREG:v[0-9]+]], vcc, 16 ; SI-PROMOTE: ds_write_b32 [[PTRREG]] define void @test_private_array_ptr_calc(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %inA, i32 addrspace(1)* noalias %inB) { %alloca = alloca [4 x i32], i32 4, align 16 diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 3399d9da29e..cd554ba256b 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -154,7 +154,7 @@ define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> ; SI-LABEL: {{^}}i8_zext_inreg_i32_to_f32: ; SI: buffer_load_dword [[LOADREG:v[0-9]+]], -; SI: v_add_i32_e32 [[ADD:v[0-9]+]], 2, [[LOADREG]] +; SI: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] ; SI-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]] ; SI: buffer_store_dword [[CONV]], define void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll index 5e4654abd91..ac17df11970 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-negative-offset-addressing-mode-loop.ll @@ -10,13 +10,13 @@ declare void @llvm.AMDGPU.barrier.local() #1 ; CHECK: BB0_1: ; CHECK: v_add_i32_e32 [[VADDR:v[0-9]+]], ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR]] -; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], 4, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR4:v[0-9]+]], vcc, 4, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR4]] -; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], 0x80, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x80:v[0-9]+]], vcc, 0x80, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x80]] -; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], 0x84, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x84:v[0-9]+]], vcc, 0x84, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x84]] -; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], 0x100, [[VADDR]] +; SI-DAG: v_add_i32_e32 [[VADDR0x100:v[0-9]+]], vcc, 0x100, [[VADDR]] ; SI-DAG: ds_read_b32 v{{[0-9]+}}, [[VADDR0x100]] ; CI-DAG: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, [[VADDR]] offset1:1 diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll index e2e441214b4..00de55be852 100644 --- a/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll +++ b/llvm/test/CodeGen/AMDGPU/ds_read2st64.ll @@ -65,7 +65,7 @@ define void @simple_read2st64_f32_max_offset(float addrspace(1)* %out, float add ; SI-LABEL: @simple_read2st64_f32_over_max_offset ; SI-NOT: ds_read2st64_b32 -; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, {{v[0-9]+}} offset:256 ; SI: ds_read_b32 {{v[0-9]+}}, [[BIGADD]] ; SI: s_endpgm @@ -197,7 +197,7 @@ define void @simple_read2st64_f64_max_offset(double addrspace(1)* %out, double a ; SI-LABEL: @simple_read2st64_f64_over_max_offset ; SI-NOT: ds_read2st64_b64 -; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], 0x10000, {{v[0-9]+}} +; SI: v_add_i32_e32 [[BIGADD:v[0-9]+]], vcc, 0x10000, {{v[0-9]+}} ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}} offset:512 ; SI: ds_read_b64 {{v\[[0-9]+:[0-9]+\]}}, [[BIGADD]] ; SI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll index 1168713ca66..d56b4845728 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.bfe.i32.ll @@ -425,7 +425,7 @@ define void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) ; SI: buffer_load_dword [[LOAD:v[0-9]+]] ; SI: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 ; SI: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] -; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], [[TMP0]], [[BFE]] +; SI: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] ; SI: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] ; SI: buffer_store_dword [[TMP2]] define void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/local-memory-two-objects.ll b/llvm/test/CodeGen/AMDGPU/local-memory-two-objects.ll index f501a7ac627..7f31ef45b62 100644 --- a/llvm/test/CodeGen/AMDGPU/local-memory-two-objects.ll +++ b/llvm/test/CodeGen/AMDGPU/local-memory-two-objects.ll @@ -30,7 +30,7 @@ ; constant offsets. ; EG: LDS_READ_RET {{[*]*}} OQAP, {{PV|T}}[[ADDRR:[0-9]*\.[XYZW]]] ; EG-NOT: LDS_READ_RET {{[*]*}} OQAP, T[[ADDRR]] -; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], 16, v{{[0-9]+}} +; SI: v_add_i32_e32 [[SIPTR:v[0-9]+]], vcc, 16, v{{[0-9]+}} ; SI: ds_read_b32 {{v[0-9]+}}, [[SIPTR]] ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR:v[0-9]+]] offset:16 ; CI: ds_read_b32 {{v[0-9]+}}, [[ADDRR]] diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll index 816755efb07..9e514ef9970 100644 --- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll +++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s ; CHECK-LABEL: {{^}}fold_sgpr: -; CHECK: v_add_i32_e32 v{{[0-9]+}}, s +; CHECK: v_add_i32_e32 v{{[0-9]+}}, vcc, s define void @fold_sgpr(i32 addrspace(1)* %out, i32 %fold) { entry: %tmp0 = icmp ne i32 %fold, 0 diff --git a/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll b/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll index 268869daaa3..d43de476605 100644 --- a/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll @@ -51,7 +51,7 @@ done: ; GCN-LABEL: {{^}}legal_offset_fi_offset ; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen -; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], 0x8000 +; GCN: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, 0x8000 ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll index b1485bfaaeb..dfb2bf3383f 100644 --- a/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add_constant.ll @@ -6,7 +6,7 @@ declare i32 @llvm.r600.read.tidig.x() #1 ; FUNC-LABEL: {{^}}shl_2_add_9_i32: ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} -; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 36, [[REG]] +; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { @@ -20,7 +20,7 @@ define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { } ; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses: -; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], 9, {{v[0-9]+}} +; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}} ; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}} ; SI-DAG: buffer_store_dword [[ADDREG]] ; SI-DAG: buffer_store_dword [[SHLREG]] @@ -40,7 +40,7 @@ define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1 ; FUNC-LABEL: {{^}}shl_2_add_999_i32: ; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}} -; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], 0xf9c, [[REG]] +; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]] ; SI: buffer_store_dword [[RESULT]] ; SI: s_endpgm define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll index 6671e909cd1..ac94824bd61 100644 --- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -35,7 +35,7 @@ define void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %ad ; SI-LABEL: {{^}}load_shl_base_lds_1: ; SI: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; SI: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 -; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], 8, v{{[0-9]+}} +; SI: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} ; SI-DAG: buffer_store_dword [[RESULT]] ; SI-DAG: buffer_store_dword [[ADDUSE]] ; SI: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/sub.ll b/llvm/test/CodeGen/AMDGPU/sub.ll index b7fba0efa5b..9f9446a4e60 100644 --- a/llvm/test/CodeGen/AMDGPU/sub.ll +++ b/llvm/test/CodeGen/AMDGPU/sub.ll @@ -7,7 +7,7 @@ declare i32 @llvm.r600.read.tidig.x() readnone ; FUNC-LABEL: {{^}}test_sub_i32: ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_subrev_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_subrev_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 %a = load i32, i32 addrspace(1)* %in @@ -22,8 +22,8 @@ define void @test_sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 @@ -40,10 +40,10 @@ define void @test_sub_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1) ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} ; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} -; SI: v_sub_i32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} +; SI: v_sub_i32_e32 v{{[0-9]+, vcc, v[0-9]+, v[0-9]+}} define void @test_sub_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll index b3837f28209..dc3e2e30297 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll @@ -30,19 +30,19 @@ ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] -; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], 0, [[RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] -; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], [[E]], [[RCP]] -; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], [[E]], [[RCP]] +; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] +; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], {{[vs][0-9]+}}, [[Num_S_Remainder]] +; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], 1, [[Quotient]] +; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] ; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 @@ -110,15 +110,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] -; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -133,15 +133,15 @@ define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) { ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] -; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -260,15 +260,15 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_rcp_iflag_f32_e32 [[FIRST_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[FIRST_RCP_HI:v[0-9]+]], [[FIRST_RCP]] ; SI-DAG: v_mul_lo_i32 [[FIRST_RCP_LO:v[0-9]+]], [[FIRST_RCP]] -; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], 0, [[FIRST_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[FIRST_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FIRST_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_E:v[0-9]+]], {{v[0-9]+}}, [[FIRST_RCP]] -; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_add_i32_e32 [[FIRST_RCP_A_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FIRST_RCP_S_E:v[0-9]+]], vcc, [[FIRST_E]], [[FIRST_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FIRST_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[FIRST_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], [[FIRST_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[FIRST_Remainder:v[l0-9]+]], vcc, [[FIRST_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[FIRST_Tmp1:v[0-9]+]] @@ -283,15 +283,15 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_rcp_iflag_f32_e32 [[SECOND_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[SECOND_RCP_HI:v[0-9]+]], [[SECOND_RCP]] ; SI-DAG: v_mul_lo_i32 [[SECOND_RCP_LO:v[0-9]+]], [[SECOND_RCP]] -; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], 0, [[SECOND_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[SECOND_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[SECOND_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_E:v[0-9]+]], {{v[0-9]+}}, [[SECOND_RCP]] -; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_add_i32_e32 [[SECOND_RCP_A_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] +; SI-DAG: v_subrev_i32_e32 [[SECOND_RCP_S_E:v[0-9]+]], vcc, [[SECOND_E]], [[SECOND_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[SECOND_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[SECOND_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], [[SECOND_Num_S_Remainder]], v{{[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[SECOND_Remainder:v[0-9]+]], vcc, [[SECOND_Num_S_Remainder]], v{{[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[SECOND_Tmp1:v[0-9]+]] @@ -306,15 +306,15 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_rcp_iflag_f32_e32 [[THIRD_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[THIRD_RCP_HI:v[0-9]+]], [[THIRD_RCP]] ; SI-DAG: v_mul_lo_i32 [[THIRD_RCP_LO:v[0-9]+]], [[THIRD_RCP]] -; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], 0, [[THIRD_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[THIRD_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[THIRD_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[THIRD_E:v[0-9]+]], {{v[0-9]+}}, [[THIRD_RCP]] -; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] -; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: v_add_i32_e32 [[THIRD_RCP_A_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]] +; SI-DAG: v_subrev_i32_e32 [[THIRD_RCP_S_E:v[0-9]+]], vcc, [[THIRD_E]], [[THIRD_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[THIRD_Quotient:v[0-9]+]] ; SI-DAG: v_mul_lo_i32 [[THIRD_Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], [[THIRD_Num_S_Remainder]], {{v[0-9]+}} +; SI-DAG: v_subrev_i32_e32 [[THIRD_Remainder:v[0-9]+]], vcc, [[THIRD_Num_S_Remainder]], {{v[0-9]+}} ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 [[THIRD_Tmp1:v[0-9]+]] @@ -329,11 +329,11 @@ define void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i3 ; SI-DAG: v_rcp_iflag_f32_e32 [[FOURTH_RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[FOURTH_RCP_HI:v[0-9]+]], [[FOURTH_RCP]] ; SI-DAG: v_mul_lo_i32 [[FOURTH_RCP_LO:v[0-9]+]], [[FOURTH_RCP]] -; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], 0, [[FOURTH_RCP_LO]] +; SI-DAG: v_sub_i32_e32 [[FOURTH_NEG_RCP_LO:v[0-9]+]], vcc, 0, [[FOURTH_RCP_LO]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 [[FOURTH_E:v[0-9]+]], {{v[0-9]+}}, [[FOURTH_RCP]] -; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] -; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: v_add_i32_e32 [[FOURTH_RCP_A_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]] +; SI-DAG: v_subrev_i32_e32 [[FOURTH_RCP_S_E:v[0-9]+]], vcc, [[FOURTH_E]], [[FOURTH_RCP]] ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm define void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll index 9b2f229c05a..2bfe1b2bd6e 100644 --- a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll +++ b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll @@ -3,8 +3,8 @@ ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: -; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, s -; SI: v_subrev_i32_e32 v{{[0-9]+}}, s +; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s +; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s ; ModuleID = 'vop-shrink.ll' diff --git a/llvm/test/MC/AMDGPU/vop2-err.s b/llvm/test/MC/AMDGPU/vop2-err.s index a1131000a90..47d7d5bbecb 100644 --- a/llvm/test/MC/AMDGPU/vop2-err.s +++ b/llvm/test/MC/AMDGPU/vop2-err.s @@ -32,4 +32,7 @@ v_mul_i32_i24_e64 v1, 100, v3 v_mul_i32_i24_e64 v1, v2, 100 // CHECK: error: invalid operand for instruction +v_add_i32_e32 v1, s[0:1], v2, v3 +// CHECK: error: invalid operand for instruction + // TODO: Constant bus restrictions diff --git a/llvm/test/MC/AMDGPU/vop2.s b/llvm/test/MC/AMDGPU/vop2.s index a1f3b8d8936..0a875608464 100644 --- a/llvm/test/MC/AMDGPU/vop2.s +++ b/llvm/test/MC/AMDGPU/vop2.s @@ -251,41 +251,85 @@ v_mbcnt_lo_u32_b32 v1, v2, v3 // VI: v_mbcnt_hi_u32_b32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x8d,0xd2,0x02,0x07,0x02,0x00] v_mbcnt_hi_u32_b32 v1, v2, v3 -// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] -// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] -v_add_i32 v1, v2, v3 +// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +v_add_i32 v1, vcc, v2, v3 -// SICI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] -// VI: v_add_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] -v_add_u32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_i32 v1, s[0:1], v2, v3 -// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] -v_sub_i32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_i32_e64 v1, s[0:1], v2, v3 -// SICI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] -// VI: v_sub_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] -v_sub_u32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, vcc, v2, v3 ; encoding: [0x01,0x6a,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_i32_e64 v1, vcc, v2, v3 -// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] -// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] -v_subrev_i32 v1, v2, v3 +// SICI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4a] +// VI: v_add_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x32] +v_add_u32 v1, vcc, v2, v3 -// SICI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] -// VI: v_subrev_i32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] -v_subrev_u32 v1, v2, v3 +// SICI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4a,0xd2,0x02,0x07,0x02,0x00] +// VI: v_add_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x19,0xd1,0x02,0x07,0x02,0x00] +v_add_u32 v1, s[0:1], v2, v3 -// SICI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] -// VI: v_addc_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] -v_addc_u32 v1, v2, v3 +// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +v_sub_i32 v1, vcc, v2, v3 -// SICI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] -// VI: v_subb_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] -v_subb_u32 v1, v2, v3 +// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +v_sub_i32 v1, s[0:1], v2, v3 -// SICI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] -// VI: v_subbrev_u32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] -v_subbrev_u32 v1, v2, v3 +// SICI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4c] +// VI: v_sub_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x34] +v_sub_u32 v1, vcc, v2, v3 + +// SICI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4c,0xd2,0x02,0x07,0x02,0x00] +// VI: v_sub_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1a,0xd1,0x02,0x07,0x02,0x00] +v_sub_u32 v1, s[0:1], v2, v3 + +// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +v_subrev_i32 v1, vcc, v2, v3 + +// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +v_subrev_i32 v1, s[0:1], v2, v3 + +// SICI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x4e] +// VI: v_subrev_i32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x36] +v_subrev_u32 v1, vcc, v2, v3 + +// SICI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x4e,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subrev_i32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1b,0xd1,0x02,0x07,0x02,0x00] +v_subrev_u32 v1, s[0:1], v2, v3 + +// SICI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x50] +// VI: v_addc_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x38] +v_addc_u32 v1, vcc, v2, v3 + +// SICI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x50,0xd2,0x02,0x07,0x02,0x00] +// VI: v_addc_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1c,0xd1,0x02,0x07,0x02,0x00] +v_addc_u32 v1, s[0:1], v2, v3 + +// SICI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x52] +// VI: v_subb_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3a] +v_subb_u32 v1, vcc, v2, v3 + +// SICI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x52,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subb_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1d,0xd1,0x02,0x07,0x02,0x00] +v_subb_u32 v1, s[0:1], v2, v3 + +// SICI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x54] +// VI: v_subbrev_u32_e32 v1, vcc, v2, v3 ; encoding: [0x02,0x07,0x02,0x3c] +v_subbrev_u32 v1, vcc, v2, v3 + +// SICI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x54,0xd2,0x02,0x07,0x02,0x00] +// VI: v_subbrev_u32_e64 v1, s[0:1], v2, v3 ; encoding: [0x01,0x00,0x1e,0xd1,0x02,0x07,0x02,0x00] +v_subbrev_u32 v1, s[0:1], v2, v3 // SICI: v_ldexp_f32_e32 v1, v2, v3 ; encoding: [0x02,0x07,0x02,0x56] // VI: v_ldexp_f32_e64 v1, v2, v3 ; encoding: [0x01,0x00,0x88,0xd2,0x02,0x07,0x02,0x00] |