diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
48 files changed, 299 insertions, 297 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll index a2f647cefd1..a89c1c21493 100644 --- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll @@ -52,8 +52,8 @@ define amdgpu_kernel void @s_test_add_self_v2i16(<2 x i16> addrspace(1)* %out, < ; GCN-LABEL: {{^}}s_test_add_v2i16_kernarg: ; GFX9: v_pk_add_u16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; VI: v_add_i32 -; VI: v_add_i32_sdwa +; VI: v_add_u32 +; VI: v_add_u32_sdwa define amdgpu_kernel void @s_test_add_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 { %add = add <2 x i16> %a, %b store <2 x i16> %add, <2 x i16> addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll index 79450b97c21..1d5c538427b 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgcn.private-memory.ll @@ -13,7 +13,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone ; GCN-LABEL: {{^}}work_item_info: ; GCN-NOT: v0 -; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, v0, v{{[0-9]+}} ; GCN: buffer_store_dword [[RESULT]] define amdgpu_kernel void @work_item_info(i32 addrspace(1)* %out, i32 %in) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll index 0ba8836b20d..4993b95d228 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-shader-calling-convention.ll @@ -3,7 +3,7 @@ ; GCN-LABEL: {{^}}shader_cc: -; GCN: v_add_i32_e32 v0, vcc, s8, v0 +; GCN: v_add_{{[iu]}}32_e32 v0, vcc, s8, v0 define amdgpu_cs float @shader_cc(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) { %vi = bitcast float %v to i32 %x = add i32 %vi, %w diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll index d91bf4aa310..71c4c83c28f 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu.private-memory.ll @@ -391,7 +391,7 @@ entry: ; FUNC-LABEL: ptrtoint: ; SI-NOT: ds_write ; SI: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen -; SI: v_add_i32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5, +; SI: v_add_{{[iu]}}32_e32 [[ADD_OFFSET:v[0-9]+]], vcc, 5, ; SI: buffer_load_dword v{{[0-9]+}}, [[ADD_OFFSET:v[0-9]+]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen ; define amdgpu_kernel void @ptrtoint(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 { %alloca = alloca [16 x i32] diff --git a/llvm/test/CodeGen/AMDGPU/bfe-combine.ll b/llvm/test/CodeGen/AMDGPU/bfe-combine.ll index 6035e3bf4a5..cc0ae3ab430 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-combine.ll @@ -9,8 +9,8 @@ ; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 6, v{{[0-9]+}} ; CI: v_and_b32_e32 v[[ADDRLO:[0-9]+]], 0x3fc, v[[SHR]] -; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] -; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] ; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 @@ -29,11 +29,11 @@ define amdgpu_kernel void @bfe_combine8(i32 addrspace(1)* nocapture %arg, i32 %x ; VI-SDWA: v_mov_b32_e32 v[[SHIFT:[0-9]+]], 15 ; VI-SDWA: v_lshlrev_b32_sdwa v[[ADDRBASE1:[0-9]+]], v[[SHIFT]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; VI-SDWA: v_lshlrev_b64 v{{\[}}[[ADDRBASE:[0-9]+]]:{{[^\]+}}], 2, v{{\[}}[[ADDRBASE1]]:{{[^\]+}}] -; VI-SDWA: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI-SDWA: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] ; CI: v_lshrrev_b32_e32 v[[SHR:[0-9]+]], 1, v{{[0-9]+}} ; CI: v_and_b32_e32 v[[AND:[0-9]+]], 0x7fff8000, v[[SHR]] ; CI: v_lshl_b64 v{{\[}}[[ADDRLO:[0-9]+]]:{{[^\]+}}], v{{\[}}[[AND]]:{{[^\]+}}], 2 -; VI: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] +; VI: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADDRBASE]] ; GCN: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @bfe_combine16(i32 addrspace(1)* nocapture %arg, i32 %x) { %id = tail call i32 @llvm.amdgcn.workitem.id.x() #2 diff --git a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll index 907c8c2216b..a33ab4e379d 100644 --- a/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll +++ b/llvm/test/CodeGen/AMDGPU/bfe-patterns.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @v_ubfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace( ; GCN-LABEL: {{^}}v_ubfe_sub_multi_use_shl_i32: ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] -; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] +; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] ; SI-NEXT: v_lshr_b32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] @@ -100,7 +100,7 @@ define amdgpu_kernel void @v_sbfe_sub_i32(i32 addrspace(1)* %out, i32 addrspace( ; GCN-LABEL: {{^}}v_sbfe_sub_multi_use_shl_i32: ; GCN: {{buffer|flat}}_load_dword [[SRC:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[WIDTH:v[0-9]+]] -; GCN: v_sub_i32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] +; GCN: v_sub_{{[iu]}}32_e32 [[SUB:v[0-9]+]], vcc, 32, [[WIDTH]] ; SI-NEXT: v_lshl_b32_e32 [[SHL:v[0-9]+]], [[SRC]], [[SUB]] ; SI-NEXT: v_ashr_i32_e32 [[BFE:v[0-9]+]], [[SHL]], [[SUB]] diff --git a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll index 398c7d32aee..103b8c3a303 100644 --- a/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll +++ b/llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll @@ -38,11 +38,11 @@ entry: ; GCN-DAG: s_add_u32 s32, s32, 0xb00{{$}} ; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}} -; GCN: v_add_i32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] +; GCN: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]] ; GCN: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}} ; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}} -; GCN: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] +; GCN: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]] ; GCN: s_swappc_b64 diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll index a544cbe890b..40548ba4cb9 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll @@ -112,7 +112,7 @@ define amdgpu_kernel void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 ; GCN: v_cndmask_b32_e32 [[SELECT:v[0-9]+]], 32, [[FFBH]], vcc ; SI: v_subrev_i32_e32 [[RESULT:v[0-9]+]], vcc, 24, [[SELECT]] -; VI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]] +; VI: v_add_u32_e32 [[RESULT:v[0-9]+]], vcc, -16, [[SELECT]] ; GCN: buffer_store_byte [[RESULT]], ; GCN: s_endpgm define amdgpu_kernel void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind { @@ -151,7 +151,7 @@ define amdgpu_kernel void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] -; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], vcc ; GCN-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]] diff --git a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll index 7500da53630..9796bc780ed 100644 --- a/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll @@ -126,7 +126,7 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias ; GCN-DAG: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}} ; GCN-DAG: v_cmp_eq_u32_e32 vcc, 0, v[[HI]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]] -; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]] ; GCN-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]] ; GCN-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]] ; GCN: {{buffer|flat}}_store_dwordx2 {{.*}}v{{\[}}[[CTLZ]]:[[CTLZ_HI:[0-9]+]]{{\]}} diff --git a/llvm/test/CodeGen/AMDGPU/ctpop64.ll b/llvm/test/CodeGen/AMDGPU/ctpop64.ll index 8d2dac9538f..298280f755f 100644 --- a/llvm/test/CodeGen/AMDGPU/ctpop64.ll +++ b/llvm/test/CodeGen/AMDGPU/ctpop64.ll @@ -188,7 +188,7 @@ define amdgpu_kernel void @s_ctpop_i65(i32 addrspace(1)* noalias %out, i65 %val) ; GCN-DAG: v_bcnt_u32_b32{{(_e64)*}} [[MIDRESULT2:v[0-9]+]], v[[VAL0]], 0 ; GCN-DAG: v_bcnt_u32_b32{{(_e32)*(_e64)*}} [[MIDRESULT3:v[0-9]+]], v{{[0-9]+}}, [[MIDRESULT2]] -; GCN: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]] +; GCN: v_add_{{[iu]}}32_e32 [[RESULT:v[0-9]+]], vcc, [[MIDRESULT1]], [[MIDRESULT2]] ; GCN: buffer_store_dword [[RESULT]], ; GCN: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 0600633aa27..c6f7e3ea19a 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -169,7 +169,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias ; GCN-LABEL: {{^}}i8_zext_inreg_i32_to_f32: ; GCN: {{buffer|flat}}_load_dword [[LOADREG:v[0-9]+]], -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] +; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 2, [[LOADREG]] ; GCN-NEXT: v_cvt_f32_ubyte0_e32 [[CONV:v[0-9]+]], [[ADD]] ; GCN: buffer_store_dword [[CONV]], define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { diff --git a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll index a723b0210ad..cf6c297906b 100644 --- a/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll +++ b/llvm/test/CodeGen/AMDGPU/ds-combine-large-stride.ll @@ -4,12 +4,12 @@ ; GCN-LABEL: ds_read32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 @@ -46,12 +46,12 @@ bb: ; GCN-LABEL: ds_read32_combine_stride_400_back: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:100 ; GCN-DAG: ds_read2_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:100 @@ -124,12 +124,12 @@ bb: ; GCN-LABEL: ds_read32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:32 ; GCN-DAG: ds_read2st64_b32 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:32 @@ -160,8 +160,8 @@ bb: ; GCN-LABEL: ds_read64_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset1:50 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:100 offset1:150 ; GCN-DAG: ds_read2_b64 v[{{[0-9]+:[0-9]+}}], [[BASE]] offset0:200 offset1:250 @@ -198,12 +198,12 @@ bb: ; GCN-LABEL: ds_read64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B1]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B2]] offset1:16 ; GCN-DAG: ds_read2st64_b64 v[{{[0-9]+:[0-9]+}}], [[B3]] offset1:16 @@ -234,12 +234,12 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 @@ -267,12 +267,12 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_400_back: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x320, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x640, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_write2_b32 [[BASE]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 ; GCN-DAG: ds_write2_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:100 @@ -327,12 +327,12 @@ bb: ; GCN-LABEL: ds_write32_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 4, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4004, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8004, [[BASE]] ; GCN-DAG: ds_write2st64_b32 [[B1]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 ; GCN-DAG: ds_write2st64_b32 [[B2]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 ; GCN-DAG: ds_write2st64_b32 [[B3]], v{{[0-9]+}}, v{{[0-9]+}} offset1:32 @@ -356,8 +356,8 @@ bb: ; GCN-LABEL: ds_write64_combine_stride_400: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 0x960, [[BASE]] ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:50 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:100 offset1:150 ; GCN-DAG: ds_write2_b64 [[BASE]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset0:200 offset1:250 @@ -385,12 +385,12 @@ bb: ; GCN-LABEL: ds_write64_combine_stride_8192_shifted: ; GCN: s_load_dword [[ARG:s[0-9]+]], s[4:5], 0x0 ; GCN: v_mov_b32_e32 [[BASE:v[0-9]+]], [[ARG]] -; GCN-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GCN-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] -; GFX9-DAG: v_add_i32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GCN-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B1:v[0-9]+]], vcc, 8, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B2:v[0-9]+]], vcc, 0x4008, [[BASE]] +; GFX9-DAG: v_add_{{[_co]*}}u32_e32 [[B3:v[0-9]+]], vcc, 0x8008, [[BASE]] ; GCN-DAG: ds_write2st64_b64 [[B1]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 ; GCN-DAG: ds_write2st64_b64 [[B2]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 ; GCN-DAG: ds_write2st64_b64 [[B3]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] offset1:16 diff --git a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll index 4748de570a9..f140a762942 100644 --- a/llvm/test/CodeGen/AMDGPU/fence-barrier.ll +++ b/llvm/test/CodeGen/AMDGPU/fence-barrier.ll @@ -53,7 +53,7 @@ define amdgpu_kernel void @test_local(i32 addrspace(1)*) { } ; GCN-LABEL: {{^}}test_global -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} +; GCN: v_add_u32_e32 v{{[0-9]+}}, vcc, 0x888, v{{[0-9]+}} ; GCN: flat_store_dword ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0){{$}} ; GCN-NEXT: s_barrier diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll index 39b23cc53d4..604619a69c2 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -24,7 +24,7 @@ define void @void_func_i1_zeroext(i1 zeroext %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i1_signext: ; GCN: s_waitcnt -; GCN-NEXT: v_add_i32_e32 v0, vcc, 12, v0 +; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 ; GCN-NOT: v0 ; GCN: buffer_store_dword v0, off define void @void_func_i1_signext(i1 signext %arg0) #0 { @@ -60,7 +60,7 @@ define void @void_func_i8(i8 %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i8_zeroext: ; GCN-NOT: and_b32 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { %ext = zext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -70,7 +70,7 @@ define void @void_func_i8_zeroext(i8 zeroext %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i8_signext: ; GCN-NOT: v_bfe_i32 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i8_signext(i8 signext %arg0) #0 { %ext = sext i8 %arg0 to i32 %add = add i32 %ext, 12 @@ -87,7 +87,7 @@ define void @void_func_i16(i16 %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i16_zeroext: ; GCN-NOT: v0 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { %ext = zext i16 %arg0 to i32 %add = add i32 %ext, 12 @@ -97,7 +97,7 @@ define void @void_func_i16_zeroext(i16 zeroext %arg0) #0 { ; GCN-LABEL: {{^}}void_func_i16_signext: ; GCN-NOT: v0 -; GCN: v_add_i32_e32 v0, vcc, 12, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, 12, v0 define void @void_func_i16_signext(i16 signext %arg0) #0 { %ext = sext i16 %arg0 to i32 %add = add i32 %ext, 12 diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll index 30c9f2c3c56..28406e16219 100644 --- a/llvm/test/CodeGen/AMDGPU/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -396,100 +396,100 @@ define void @void_func_sret_struct_i8_i32({ i8, i32 }* sret %arg0) #0 { ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}} ; GCN: buffer_load_dword v34 @@ -510,100 +510,100 @@ define <33 x i32> @v33i32_func_void() #0 { ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_4:v[0-9]+]], vcc, 4, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_4]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_8:v[0-9]+]], vcc, 8, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_8]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_12:v[0-9]+]], vcc, 12, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_12]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_16:v[0-9]+]], vcc, 16, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_16]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_20:v[0-9]+]], vcc, 20, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_20]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_24:v[0-9]+]], vcc, 24, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_24]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_28:v[0-9]+]], vcc, 28, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_28]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_32:v[0-9]+]], vcc, 32, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_32]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_36:v[0-9]+]], vcc, 36, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_36]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_40:v[0-9]+]], vcc, 40, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_40]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_44:v[0-9]+]], vcc, 44, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_44]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_48:v[0-9]+]], vcc, 48, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_48]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_52:v[0-9]+]], vcc, 52, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_52]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_56:v[0-9]+]], vcc, 56, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_56]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_60:v[0-9]+]], vcc, 60, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_60]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_64:v[0-9]+]], vcc, 64, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_64]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_68:v[0-9]+]], vcc, 0x44, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_68]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_72:v[0-9]+]], vcc, 0x48, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_72]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_76:v[0-9]+]], vcc, 0x4c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_76]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_80:v[0-9]+]], vcc, 0x50, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_80]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_84:v[0-9]+]], vcc, 0x54, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_84]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_88:v[0-9]+]], vcc, 0x58, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_88]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_92:v[0-9]+]], vcc, 0x5c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_92]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_96:v[0-9]+]], vcc, 0x60, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_96]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_100:v[0-9]+]], vcc, 0x64, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_100]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_104:v[0-9]+]], vcc, 0x68, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_104]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_108:v[0-9]+]], vcc, 0x6c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_108]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_112:v[0-9]+]], vcc, 0x70, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_112]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_116:v[0-9]+]], vcc, 0x74, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_116]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_120:v[0-9]+]], vcc, 0x78, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_120]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_124:v[0-9]+]], vcc, 0x7c, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_124]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}} ; GCN: buffer_load_dword v34 @@ -623,11 +623,11 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_128:v[0-9]+]], vcc, 0x80, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_128]], s[0:3], s4 offen{{$}} -; GCN-DAG: v_add_i32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 [[ADD_256:v[0-9]+]], vcc, 0xfc, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[ADD_256]], s[0:3], s4 offen{{$}} ; GCN: buffer_load_dword v33 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll index d6f8ac70da1..4f8c6191224 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.load.ll @@ -103,7 +103,7 @@ main_body: } ;CHECK-LABEL: {{^}}buffer_load_negative_offset: -;CHECK: v_add_i32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0 +;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0 ;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll index 617f1f19e36..a379f86e200 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.init.exec.ll @@ -51,7 +51,7 @@ main_body: ; GCN: s_bfm_b64 exec, s1, 0 ; GCN: s_cmp_eq_u32 s1, 64 ; GCN: s_cmov_b64 exec, -1 -; GCN: v_add_i32_e32 v0, vcc, s0, v0 +; GCN: v_add_co_u32_e32 v0, vcc, s0, v0 define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) { main_body: call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19) @@ -65,7 +65,7 @@ main_body: ; GCN: s_bfm_b64 exec, s1, 0 ; GCN: s_cmp_eq_u32 s1, 64 ; GCN: s_cmov_b64 exec, -1 -; GCN: v_add_i32_e32 v0, vcc, s0, v0 +; GCN: v_add_co_u32_e32 v0, vcc, s0, v0 define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) { main_body: %s = add i32 %a, %count diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll index 871b8c4f99b..b3e1c86d947 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sbfe.ll @@ -396,7 +396,7 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs ; GCN: buffer_load_dword [[LOAD:v[0-9]+]] ; GCN: v_bfe_i32 [[BFE:v[0-9]+]], [[LOAD]], 1, 16 ; GCN: v_lshrrev_b32_e32 [[TMP0:v[0-9]+]], 31, [[BFE]] -; GCN: v_add_i32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] +; GCN: v_add_{{[iu]}}32_e32 [[TMP1:v[0-9]+]], vcc, [[TMP0]], [[BFE]] ; GCN: v_ashrrev_i32_e32 [[TMP2:v[0-9]+]], 1, [[TMP1]] ; GCN: buffer_store_dword [[TMP2]] define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll index 68fd08f778c..62bd684d541 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll @@ -65,7 +65,7 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 ; FIXME: Should be using s_add_i32 ; GCN-NOT: {{[^@]}}bfe @@ -81,7 +81,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 ; GCN-NOT: {{[^@]}}bfe ; GCN: s_endpgm @@ -96,7 +96,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_1: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN: bfe ; GCN: s_endpgm define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { @@ -110,7 +110,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_3: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0xf8 ; GCN-NEXT: bfe ; GCN: s_endpgm @@ -125,7 +125,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i8_offset_7: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: v_and_b32_e32 {{v[0-9]+}}, 0x80 ; GCN-NEXT: bfe ; GCN: s_endpgm @@ -140,7 +140,7 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou ; GCN-LABEL: {{^}}bfe_u32_zext_in_reg_i16_offset_8: ; GCN: buffer_load_dword -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN-NEXT: bfe ; GCN: s_endpgm define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll index 00ee1b700bf..82875f8ddcd 100644 --- a/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll +++ b/llvm/test/CodeGen/AMDGPU/pack.v2f16.ll @@ -87,7 +87,7 @@ define amdgpu_kernel void @v_pack_v2f16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GFX9: v_and_b32_e32 [[ELT0:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[ELT0]] -; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] +; GFX9: v_add_{{[_coiu]*}}32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2f16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll index 22c72e7c552..d211999ada1 100644 --- a/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/pack.v2i16.ll @@ -81,7 +81,7 @@ define amdgpu_kernel void @v_pack_v2i16(i32 addrspace(1)* %in0, i32 addrspace(1) ; GFX9: v_and_b32_e32 [[MASKED:v[0-9]+]], 0xffff, [[VAL0]] ; GFX9: v_lshl_or_b32 [[PACKED:v[0-9]+]], [[VAL1]], 16, [[MASKED]] -; GFX9: v_add_i32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] +; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 9, [[PACKED]] define amdgpu_kernel void @v_pack_v2i16_user(i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/rotl.i64.ll b/llvm/test/CodeGen/AMDGPU/rotl.i64.ll index fa29d789ceb..1134f191c88 100644 --- a/llvm/test/CodeGen/AMDGPU/rotl.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/rotl.i64.ll @@ -20,7 +20,7 @@ entry: ; BOTH-LABEL: {{^}}v_rotl_i64: ; SI-DAG: v_lshl_b64 ; VI-DAG: v_lshlrev_b64 -; BOTH-DAG: v_sub_i32 +; BOTH-DAG: v_sub_{{[iu]}}32 ; SI: v_lshr_b64 ; VI: v_lshrrev_b64 ; BOTH: v_or_b32 diff --git a/llvm/test/CodeGen/AMDGPU/rotr.i64.ll b/llvm/test/CodeGen/AMDGPU/rotr.i64.ll index af58b404ca6..c55af2376ff 100644 --- a/llvm/test/CodeGen/AMDGPU/rotr.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/rotr.i64.ll @@ -17,7 +17,7 @@ entry: } ; BOTH-LABEL: {{^}}v_rotr_i64: -; BOTH-DAG: v_sub_i32 +; BOTH-DAG: v_sub_{{[iu]}}32 ; SI-DAG: v_lshr_b64 ; SI-DAG: v_lshl_b64 ; VI-DAG: v_lshrrev_b64 diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll index 5f39d8191e9..c92ea657be0 100644 --- a/llvm/test/CodeGen/AMDGPU/saddo.ll +++ b/llvm/test/CodeGen/AMDGPU/saddo.ll @@ -49,7 +49,7 @@ define amdgpu_kernel void @s_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_saddo_i64: -; SI: v_add_i32 +; SI: v_add_{{[iu]}}32 ; SI: v_addc_u32 define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll b/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll index 3f5a5c511c4..ade5a96929a 100644 --- a/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll +++ b/llvm/test/CodeGen/AMDGPU/scratch-buffer.ll @@ -50,7 +50,7 @@ done: ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} ; This constant isn't folded, because it has multiple uses. ; GCN-DAG: v_mov_b32_e32 [[K8000:v[0-9]+]], 0x8004 -; GCN-DAG: v_add_i32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[OFFSET:v[0-9]+]], vcc, [[K8000]] ; GCN: buffer_store_dword v{{[0-9]+}}, [[OFFSET]], s[{{[0-9]+}}:{{[0-9]+}}], s{{[0-9]+}} offen{{$}} define amdgpu_kernel void @legal_offset_fi_offset(i32 addrspace(1)* %out, i32 %cond, i32 addrspace(1)* %offsets, i32 %if_offset, i32 %else_offset) { @@ -87,7 +87,7 @@ done: } ; GCN-LABEL: {{^}}neg_vaddr_offset_inbounds: -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}} define amdgpu_kernel void @neg_vaddr_offset_inbounds(i32 %offset) { entry: @@ -99,7 +99,7 @@ entry: } ; GCN-LABEL: {{^}}neg_vaddr_offset: -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 16, v{{[0-9]+}} ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[{{[0-9]+:[0-9]+}}], s{{[0-9]+}} offen{{$}} define amdgpu_kernel void @neg_vaddr_offset(i32 %offset) { entry: diff --git a/llvm/test/CodeGen/AMDGPU/sdiv.ll b/llvm/test/CodeGen/AMDGPU/sdiv.ll index 305107f690f..f75bec411d2 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv.ll @@ -37,10 +37,10 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* ; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]], ; SI-DAG: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x98a1930b ; SI: v_mul_hi_i32 [[TMP:v[0-9]+]], [[VAL]], [[MAGIC]] -; SI: v_add_i32 +; SI: v_add_{{[iu]}}32 ; SI: v_lshrrev_b32 ; SI: v_ashrrev_i32 -; SI: v_add_i32 +; SI: v_add_{{[iu]}}32 ; SI: buffer_store_dword ; SI: s_endpgm define amdgpu_kernel void @slow_sdiv_i32_3435(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll index 0d181c2c34b..d7d2c43e6cf 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -4,10 +4,10 @@ ; GCN-LABEL: {{^}}add_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} -; NOSDWA: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] -; NOSDWA-NOT: v_add_i32_sdwa +; NOSDWA: v_add_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] +; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa -; SDWA: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA: v_add_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 @@ -19,10 +19,10 @@ define amdgpu_kernel void @add_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* ; GCN-LABEL: {{^}}sub_shr_i32: ; NOSDWA: v_lshrrev_b32_e32 v[[DST:[0-9]+]], 16, v{{[0-9]+}} -; NOSDWA: v_subrev_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] -; NOSDWA-NOT: v_subrev_i32_sdwa +; NOSDWA: v_subrev_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v[[DST]] +; NOSDWA-NOT: v_subrev_{{[_cou]*}}32_sdwa -; SDWA: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; SDWA: v_subrev_{{[_cou]*}}32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 define amdgpu_kernel void @sub_shr_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %a = load i32, i32 addrspace(1)* %in, align 4 @@ -426,9 +426,9 @@ entry: } ; GCN-LABEL: {{^}}add_bb_v2i16: -; NOSDWA-NOT: v_add_i32_sdwa +; NOSDWA-NOT: v_add_{{[_cou]*}}32_sdwa -; VI: v_add_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI: v_add_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX9: v_pk_add_u16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll index 8250bad7b0a..ed11c2c3e22 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy-duplicate-operand.ll @@ -5,7 +5,7 @@ ; used in an REG_SEQUENCE that also needs to be handled. ; SI-LABEL: {{^}}test_dup_operands: -; SI: v_add_i32_e32 +; SI: v_add_{{[iu]}}32_e32 define amdgpu_kernel void @test_dup_operands(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %in) { %a = load <2 x i32>, <2 x i32> addrspace(1)* %in %lo = extractelement <2 x i32> %a, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll index 3b24cf82d78..9a431bc387e 100644 --- a/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll +++ b/llvm/test/CodeGen/AMDGPU/sgpr-copy.ll @@ -321,7 +321,7 @@ ENDIF69: ; preds = %LOOP68 ; CHECK: s_cmp_eq_u32 ; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]] -; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} +; CHECK: v_add_{{[iu]}}32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}} ; [[END]]: ; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}} diff --git a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll index 1cdfec9fdb5..2fe064e5dc9 100644 --- a/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll +++ b/llvm/test/CodeGen/AMDGPU/shl-add-to-add-shl.ll @@ -5,9 +5,9 @@ ; CHECK-LABEL: {{^}}add_const_offset: ; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0 -; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]] +; CHECK: v_add_u32_e32 v[[ADD:[0-9]+]], vcc, 0xc80, v[[SHL]] ; CHECK-NOT: v_lshl -; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]] +; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[ADD]] ; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @add_const_offset(i32 addrspace(1)* nocapture %arg) { bb: @@ -24,7 +24,7 @@ bb: ; CHECK: v_lshlrev_b32_e32 v[[SHL:[0-9]+]], 4, v0 ; CHECK: v_or_b32_e32 v[[OR:[0-9]+]], 0x1000, v[[SHL]] ; CHECK-NOT: v_lshl -; CHECK: v_add_i32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]] +; CHECK: v_add_u32_e32 v[[ADDRLO:[0-9]+]], vcc, s{{[0-9]+}}, v[[OR]] ; CHECK: load_dword v{{[0-9]+}}, v{{\[}}[[ADDRLO]]: define amdgpu_kernel void @or_const_offset(i32 addrspace(1)* nocapture %arg) { bb: diff --git a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll index cee57083fe5..3e77d876da3 100644 --- a/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll +++ b/llvm/test/CodeGen/AMDGPU/shl_add_ptr.ll @@ -35,7 +35,7 @@ define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 add ; GCN-LABEL: {{^}}load_shl_base_lds_1: ; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}} ; GCN: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8 -; GCN: v_add_i32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}} ; GCN-DAG: buffer_store_dword [[RESULT]] ; GCN-DAG: buffer_store_dword [[ADDUSE]] ; GCN: s_endpgm @@ -301,7 +301,7 @@ define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 { ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0 ; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528 -; GCN-DAG: v_add_i32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]] ; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}} define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 { %idx.add = add nuw i32 %idx, 8191 @@ -315,7 +315,7 @@ define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 { } ; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_lds_offset: -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0 +; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]] ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]] ; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+$}} @@ -353,7 +353,7 @@ define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 { ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0 ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen offset:4088 -; GCN-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]] +; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]] ; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], s4 offen{{$}} define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #0 { %idx = zext i16 %idx.arg to i32 @@ -367,7 +367,7 @@ define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) # ret void } ; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset: -; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0 +; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0 ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]] ; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]] ; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], s4 offen{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll index 14ca635c6da..a185bc35ee5 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll +++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll @@ -6,7 +6,7 @@ ; GCN-LABEL: {{^}}v_test_i32_x_sub_64: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]] +; GCN: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -21,8 +21,8 @@ define amdgpu_kernel void @v_test_i32_x_sub_64(i32 addrspace(1)* %out, i32 addrs ; GCN-LABEL: {{^}}v_test_i32_x_sub_64_multi_use: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[Y:v[0-9]+]] -; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]] -; GCN-DAG: v_subrev_i32_e32 v{{[0-9]+}}, vcc, 64, [[Y]] +; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] +; GCN-DAG: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[Y]] define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -39,7 +39,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out, ; GCN-LABEL: {{^}}v_test_i32_64_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 64, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 64, [[X]] define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -53,7 +53,7 @@ define amdgpu_kernel void @v_test_i32_64_sub_x(i32 addrspace(1)* %out, i32 addrs ; GCN-LABEL: {{^}}v_test_i32_x_sub_65: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]] +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffbf, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -67,7 +67,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_65(i32 addrspace(1)* %out, i32 addrs ; GCN-LABEL: {{^}}v_test_i32_65_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0x41, [[X]] define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -81,7 +81,7 @@ define amdgpu_kernel void @v_test_i32_65_sub_x(i32 addrspace(1)* %out, i32 addrs ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg16: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 16, [[X]] +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 16, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -95,7 +95,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg16(i32 addrspace(1)* %out, i32 ad ; GCN-LABEL: {{^}}v_test_i32_neg16_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, -16, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, -16, [[X]] define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -109,7 +109,7 @@ define amdgpu_kernel void @v_test_i32_neg16_sub_x(i32 addrspace(1)* %out, i32 ad ; GCN-LABEL: {{^}}v_test_i32_x_sub_neg17: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 17, [[X]] +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 17, [[X]] define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 @@ -123,7 +123,7 @@ define amdgpu_kernel void @v_test_i32_x_sub_neg17(i32 addrspace(1)* %out, i32 ad ; GCN-LABEL: {{^}}v_test_i32_neg17_sub_x: ; GCN: {{buffer|flat}}_load_dword [[X:v[0-9]+]] -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]] +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, 0xffffffef, [[X]] define amdgpu_kernel void @v_test_i32_neg17_sub_x(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 diff --git a/llvm/test/CodeGen/AMDGPU/sibling-call.ll b/llvm/test/CodeGen/AMDGPU/sibling-call.ll index 833de07095b..1fca5499883 100644 --- a/llvm/test/CodeGen/AMDGPU/sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/sibling-call.ll @@ -4,7 +4,7 @@ ; GCN-LABEL: {{^}}i32_fastcc_i32_i32: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { %add0 = add i32 %arg0, %arg1 @@ -13,7 +13,7 @@ define fastcc i32 @i32_fastcc_i32_i32(i32 %arg0, i32 %arg1) #1 { ; GCN-LABEL: {{^}}i32_fastcc_i32_i32_stack_object: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN: v_add_i32_e32 v0, vcc, v1, v +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v ; GCN: s_mov_b32 s5, s32 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:24 ; GCN: s_waitcnt vmcnt(0) @@ -83,7 +83,7 @@ entry: ; GCN-NEXT: s_mov_b32 s5, s32 ; GCN-NEXT: buffer_load_dword v1, off, s[0:3], s5 offset:4 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_add_i32_e32 v0, vcc, v1, v0 +; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 ; GCN-NEXT: s_setpc_b64 s[30:31] define fastcc i32 @i32_fastcc_i32_byval_i32(i32 %arg0, i32* byval align 4 %arg1) #1 { %arg1.load = load i32, i32* %arg1, align 4 @@ -122,9 +122,9 @@ entry: ; GCN: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-DAG: buffer_load_dword [[LOAD_0:v[0-9]+]], off, s[0:3], s5 offset:4 ; GCN-DAG: buffer_load_dword [[LOAD_1:v[0-9]+]], off, s[0:3], s5 offset:8 -; GCN-DAG: v_add_i32_e32 v0, vcc, v1, v0 -; GCN: v_add_i32_e32 v0, vcc, [[LOAD_0]], v0 -; GCN: v_add_i32_e32 v0, vcc, [[LOAD_1]], v0 +; GCN-DAG: v_add_{{[_coiu]*}}32_e32 v0, vcc, v1, v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_0]], v0 +; GCN: v_add_{{[_coiu]*}}32_e32 v0, vcc, [[LOAD_1]], v0 ; GCN-NEXT: s_setpc_b64 define fastcc i32 @i32_fastcc_i32_i32_a32i32(i32 %arg0, i32 %arg1, [32 x i32] %large) #1 { %val_firststack = extractvalue [32 x i32] %large, 30 diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.ll b/llvm/test/CodeGen/AMDGPU/sminmax.ll index 41430715f34..10b85d3a94f 100644 --- a/llvm/test/CodeGen/AMDGPU/sminmax.ll +++ b/llvm/test/CodeGen/AMDGPU/sminmax.ll @@ -17,9 +17,9 @@ define amdgpu_kernel void @s_abs_i32(i32 addrspace(1)* %out, i32 %val) nounwind } ; FUNC-LABEL: {{^}}v_abs_i32: -; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] ; GCN: v_max_i32_e32 {{v[0-9]+}}, [[SRC]], [[NEG]] -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; EG: MAX_INT define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { @@ -33,7 +33,7 @@ define amdgpu_kernel void @v_abs_i32(i32 addrspace(1)* %out, i32 addrspace(1)* % } ; GCN-LABEL: {{^}}v_abs_i32_repeat_user: -; GCN: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] +; GCN: v_sub_{{[iu]}}32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SRC:v[0-9]+]] ; GCN: v_max_i32_e32 [[MAX:v[0-9]+]], [[SRC]], [[NEG]] ; GCN: v_mul_lo_i32 v{{[0-9]+}}, [[MAX]], [[MAX]] define amdgpu_kernel void @v_abs_i32_repeat_user(i32 addrspace(1)* %out, i32 addrspace(1)* %src) nounwind { @@ -68,14 +68,14 @@ define amdgpu_kernel void @s_abs_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> % } ; FUNC-LABEL: {{^}}v_abs_v2i32: -; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] -; GCN: v_add_i32 -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 ; EG: MAX_INT ; EG: MAX_INT @@ -127,20 +127,20 @@ define amdgpu_kernel void @s_abs_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> % } ; FUNC-LABEL: {{^}}v_abs_v4i32: -; GCN-DAG: v_sub_i32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] -; GCN-DAG: v_sub_i32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG0:v[0-9]+]], vcc, 0, [[SRC0:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG1:v[0-9]+]], vcc, 0, [[SRC1:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG2:v[0-9]+]], vcc, 0, [[SRC2:v[0-9]+]] +; GCN-DAG: v_sub_{{[iu]}}32_e32 [[NEG3:v[0-9]+]], vcc, 0, [[SRC3:v[0-9]+]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC0]], [[NEG0]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC1]], [[NEG1]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC2]], [[NEG2]] ; GCN-DAG: v_max_i32_e32 {{v[0-9]+}}, [[SRC3]], [[NEG3]] -; GCN: v_add_i32 -; GCN: v_add_i32 -; GCN: v_add_i32 -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 +; GCN: v_add_{{[iu]}}32 ; EG: MAX_INT ; EG: MAX_INT diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll index eaba2056dab..afa273bb7b4 100644 --- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -8,12 +8,12 @@ ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 -; VI: v_sub_i32_e32 -; VI-DAG: v_sub_i32_e32 +; VI: v_sub_u32_e32 +; VI-DAG: v_sub_u32_e32 ; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 ; VI: v_max_i32_sdwa v{{[0-9]+}}, sext(v{{[0-9]+}}), sext(v{{[0-9]+}}) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:WORD_0 -; VI: v_add_i32_e32 -; VI: v_add_i32_e32 +; VI: v_add_u32_e32 +; VI: v_add_u32_e32 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; CI: v_sub_i32_e32 diff --git a/llvm/test/CodeGen/AMDGPU/smrd.ll b/llvm/test/CodeGen/AMDGPU/smrd.ll index ff33bd82305..5220c26803c 100644 --- a/llvm/test/CodeGen/AMDGPU/smrd.ll +++ b/llvm/test/CodeGen/AMDGPU/smrd.ll @@ -204,7 +204,7 @@ main_body: ; GCN-LABEL: {{^}}smrd_vgpr_offset_imm_too_large: ; GCN-NEXT: BB# -; GCN-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0 +; GCN-NEXT: v_add_{{[_coiu]*}}32_e32 v0, vcc, 0x1000, v0 ; GCN-NEXT: buffer_load_dword v{{[0-9]}}, v0, s[0:3], 0 offen ; define amdgpu_ps float @smrd_vgpr_offset_imm_too_large(<4 x i32> inreg %desc, i32 %offset) #0 { main_body: diff --git a/llvm/test/CodeGen/AMDGPU/srem.ll b/llvm/test/CodeGen/AMDGPU/srem.ll index e1f110a81e9..e2b1ea40d2d 100644 --- a/llvm/test/CodeGen/AMDGPU/srem.ll +++ b/llvm/test/CodeGen/AMDGPU/srem.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @srem_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)* ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x92492493 ; SI: v_mul_hi_i32 {{v[0-9]+}}, {{v[0-9]+}}, [[MAGIC]] ; SI: v_mul_lo_i32 -; SI: v_sub_i32 +; SI: v_sub_{{[iu]}}32 ; SI: s_endpgm define amdgpu_kernel void @srem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { %num = load i32, i32 addrspace(1) * %in diff --git a/llvm/test/CodeGen/AMDGPU/ssubo.ll b/llvm/test/CodeGen/AMDGPU/ssubo.ll index a45369517c1..d4b22d60550 100644 --- a/llvm/test/CodeGen/AMDGPU/ssubo.ll +++ b/llvm/test/CodeGen/AMDGPU/ssubo.ll @@ -51,7 +51,7 @@ define amdgpu_kernel void @s_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_ssubo_i64: -; SI: v_sub_i32_e32 +; SI: v_sub_{{[iu]}}32_e32 ; SI: v_subb_u32_e32 define amdgpu_kernel void @v_ssubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* %carryout, i64 addrspace(1)* %aptr, i64 addrspace(1)* %bptr) nounwind { %a = load i64, i64 addrspace(1)* %aptr, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll index 10bda6c73d6..85cc00ad93d 100644 --- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll @@ -98,7 +98,7 @@ entry: ; GCN: s_waitcnt ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 @@ -119,7 +119,7 @@ entry: ; GCN: s_waitcnt ; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}} -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 @@ -139,7 +139,7 @@ entry: ; GCN: s_waitcnt ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 ; VI: flat_store_byte v[0:1], v{{[0-9]$}} @@ -160,7 +160,7 @@ entry: ; GCN: s_waitcnt ; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 @@ -272,7 +272,7 @@ entry: ; GCN: s_waitcnt ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}} -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 ; VI: flat_store_short v[0:1], v2{{$}} @@ -289,8 +289,9 @@ entry: ; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset: ; GCN: s_waitcnt -; GCN: v_add_i32_e32 -; GCN: v_addc_u32_e32 +; GCN: v_add_{{[_cou]*}}32_e32 +; VI: v_addc_u32_e32 +; GFX9: v_addc_co_u32_e32 ; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}} ; VI: flat_store_short v[0:1], v2{{$}} @@ -310,7 +311,7 @@ entry: ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}} ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 -; VI-DAG: v_add_i32_e32 +; VI-DAG: v_add_u32_e32 ; VI-DAG: v_addc_u32_e32 ; VI: flat_store_byte v[0:1], v2{{$}} ; GCN-NEXT: s_waitcnt @@ -327,8 +328,9 @@ entry: ; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset: ; GCN: s_waitcnt -; GCN-DAG: v_add_i32_e32 -; GCN-DAG: v_addc_u32_e32 +; GCN-DAG: v_add_{{[_cou]*}}32_e32 +; VI-DAG: v_addc_u32_e32 +; GFX9-DAG: v_addc_co_u32_e32 ; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}} ; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2 diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll index 620f38e5fba..b3f8b10c2f6 100644 --- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll @@ -49,8 +49,8 @@ define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, < ; GCN-LABEL: {{^}}s_test_sub_v2i16_kernarg: ; GFX9: v_pk_sub_i16 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} -; VI: v_subrev_i32_e32 -; VI: v_subrev_i32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI: v_subrev_u32_e32 +; VI: v_subrev_u32_sdwa v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 { %add = sub <2 x i16> %a, %b store <2 x i16> %add, <2 x i16> addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll index 5754bd9bb91..26b47dc75a8 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @s_uaddo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 ; FIXME: Could do scalar ; FUNC-LABEL: {{^}}s_uaddo_i32: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -37,7 +37,7 @@ define amdgpu_kernel void @s_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_uaddo_i32: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -58,7 +58,7 @@ define amdgpu_kernel void @v_uaddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_uaddo_i32_novcc: -; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_add_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG: ADDC_UINT @@ -95,7 +95,7 @@ define amdgpu_kernel void @s_uaddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_uaddo_i64: -; GCN: v_add_i32 +; GCN: v_add_{{[iu]}}32 ; GCN: v_addc_u32 ; EG: ADDC_UINT diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll index b89f3268976..08b764824ca 100644 --- a/llvm/test/CodeGen/AMDGPU/udivrem.ll +++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll @@ -30,24 +30,24 @@ ; SI: v_rcp_iflag_f32_e32 [[RCP:v[0-9]+]] ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] -; SI-DAG: v_sub_i32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] +; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] -; SI-DAG: v_add_i32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] -; SI-DAG: v_subrev_i32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] +; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] +; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI: v_cndmask_b32_e64 ; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] ; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] -; SI-DAG: v_add_i32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] -; SI-DAG: v_sub_i32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] +; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] +; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_subrev_i32_e32 [[Quotient_S_One:v[0-9]+]], -; SI-DAG: v_subrev_i32_e32 [[Remainder_S_Den:v[0-9]+]], +; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]], +; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]], ; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 [[Remainder_A_Den:v[0-9]+]], +; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm @@ -114,47 +114,47 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm @@ -264,80 +264,80 @@ define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i3 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_and_b32_e32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_rcp_iflag_f32_e32 ; SI-DAG: v_mul_hi_u32 ; SI-DAG: v_mul_lo_i32 -; SI-DAG: v_sub_i32_e32 +; SI-DAG: v_sub_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_mul_hi_u32 -; SI-DAG: v_add_i32_e32 -; SI-DAG: v_subrev_i32_e32 +; SI-DAG: v_add_{{[iu]}}32_e32 +; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { diff --git a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll index 8d4ac3bd6f1..1dd67ba704e 100644 --- a/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/uint_to_fp.i64.ll @@ -21,7 +21,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 ; GCN-DAG: v_cmp_eq_u64 ; GCN-DAG: v_cmp_gt_u64 -; GCN: v_add_i32_e32 [[VR:v[0-9]+]] +; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]] ; GCN: v_cvt_f16_f32_e32 [[VR_F16:v[0-9]+]], [[VR]] ; GCN: {{buffer|flat}}_store_short {{.*}}[[VR_F16]] define amdgpu_kernel void @v_uint_to_fp_i64_to_f16(half addrspace(1)* %out, i64 addrspace(1)* %in) #0 { @@ -52,7 +52,7 @@ define amdgpu_kernel void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 ; GCN-DAG: v_cmp_eq_u64 ; GCN-DAG: v_cmp_gt_u64 -; GCN: v_add_i32_e32 [[VR:v[0-9]+]] +; GCN: v_add_{{[iu]}}32_e32 [[VR:v[0-9]+]] ; GCN: {{buffer|flat}}_store_dword {{.*}}[[VR]] define amdgpu_kernel void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 { %tid = call i32 @llvm.r600.read.tidig.x() diff --git a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll index 42ac2a0247a..247b9691aff 100644 --- a/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/uniform-cfg.ll @@ -560,7 +560,7 @@ done: } ; GCN-LABEL: {{^}}move_to_valu_vgpr_operand_phi: -; GCN: v_add_i32_e32 +; GCN: v_add_{{[iu]}}32_e32 ; GCN: ds_write_b32 define void @move_to_valu_vgpr_operand_phi(i32 addrspace(3)* %out) { bb0: diff --git a/llvm/test/CodeGen/AMDGPU/urem.ll b/llvm/test/CodeGen/AMDGPU/urem.ll index 823c918dcda..35d5735f87b 100644 --- a/llvm/test/CodeGen/AMDGPU/urem.ll +++ b/llvm/test/CodeGen/AMDGPU/urem.ll @@ -21,9 +21,9 @@ define amdgpu_kernel void @test_urem_i32(i32 addrspace(1)* %out, i32 addrspace(1 ; FUNC-LABEL: {{^}}test_urem_i32_7: ; SI: v_mov_b32_e32 [[MAGIC:v[0-9]+]], 0x24924925 ; SI: v_mul_hi_u32 [[MAGIC]], {{v[0-9]+}} -; SI: v_subrev_i32 +; SI: v_subrev_{{[iu]}}32 ; SI: v_mul_lo_i32 -; SI: v_sub_i32 +; SI: v_sub_{{[iu]}}32 ; SI: buffer_store_dword ; SI: s_endpgm define amdgpu_kernel void @test_urem_i32_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll index f01bf498e0d..10c0d8640f5 100644 --- a/llvm/test/CodeGen/AMDGPU/usubo.ll +++ b/llvm/test/CodeGen/AMDGPU/usubo.ll @@ -22,7 +22,7 @@ define amdgpu_kernel void @s_usubo_i64_zext(i64 addrspace(1)* %out, i64 %a, i64 ; FIXME: Could do scalar ; FUNC-LABEL: {{^}}s_usubo_i32: -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -37,7 +37,7 @@ define amdgpu_kernel void @s_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_usubo_i32: -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -58,7 +58,7 @@ define amdgpu_kernel void @v_usubo_i32(i32 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_usubo_i32_novcc: -; GCN: v_sub_i32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} +; GCN: v_sub_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}} ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 0, 1, vcc ; EG-DAG: SUBB_UINT @@ -97,7 +97,7 @@ define amdgpu_kernel void @s_usubo_i64(i64 addrspace(1)* %out, i1 addrspace(1)* } ; FUNC-LABEL: {{^}}v_usubo_i64: -; GCN: v_sub_i32 +; GCN: v_sub_{{[iu]}}32 ; GCN: v_subb_u32 ; EG-DAG: SUBB_UINT diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll index d2708b068eb..1c2776ed4fc 100644 --- a/llvm/test/CodeGen/AMDGPU/vop-shrink.ll +++ b/llvm/test/CodeGen/AMDGPU/vop-shrink.ll @@ -4,7 +4,7 @@ ; Test that we correctly commute a sub instruction ; FUNC-LABEL: {{^}}sub_rev: ; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s -; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s +; SI: v_subrev_{{[iu]}}32_e32 v{{[0-9]+}}, vcc, s ; ModuleID = 'vop-shrink.ll' diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll index 915877fc44a..166901fe6c2 100644 --- a/llvm/test/CodeGen/AMDGPU/wqm.ll +++ b/llvm/test/CodeGen/AMDGPU/wqm.ll @@ -169,7 +169,7 @@ main_body: ;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 ;CHECK: buffer_load_dword ;CHECK: buffer_load_dword -;CHECK: v_add_i32_e32 +;CHECK: v_add_{{[iu]}}32_e32 define amdgpu_ps float @test_wwm2(i32 inreg %idx0, i32 inreg %idx1) { main_body: %src0 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx0, i32 0, i1 0, i1 0) @@ -303,7 +303,7 @@ endif: ;CHECK: v_mov_b32_e32 ;CHECK: s_not_b64 exec, exec ;CHECK: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1 -;CHECK: v_add_i32_e32 +;CHECK: v_add_{{[iu]}}32_e32 define amdgpu_ps void @test_set_inactive1(i32 inreg %idx) { main_body: %src = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0) |

