diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/br_cc.f16.ll | 8 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/commute-compares.ll | 7 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fadd.f16.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fmul.f16.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/fsub.f16.ll | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/imm16.ll | 316 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/select.f16.ll | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir | 709 |
11 files changed, 1059 insertions, 31 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll index b7584714919..340d30b898e 100644 --- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll @@ -41,7 +41,7 @@ two: } ; GCN-LABEL: {{^}}br_cc_f16_imm_a -; GCN: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}} +; SI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}} ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; GCN: buffer_load_ushort v[[B_F16:[0-9]+]] @@ -49,7 +49,7 @@ two: ; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]] ; SI: s_cbranch_vccz -; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; VI: v_cmp_nlt_f16_e32 vcc, 0.5, v[[B_F16]] ; VI: s_cbranch_vccnz ; VI: one{{$}} @@ -80,13 +80,13 @@ two: } ; GCN-LABEL: {{^}}br_cc_f16_imm_b -; GCN: v_mov_b32_e32 v[[B_F16:[0-9]+]], {{0x37ff|0x3800}}{{$}} +; SI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}} ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]] ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]] -; VI: v_cmp_ngt_f16_e32 vcc, v[[B_F16]], v[[A_F16]] +; VI: v_cmp_ngt_f16_e32 vcc, 0.5, v[[A_F16]] ; GCN: s_cbranch_vccnz ; GCN: one{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll index 055fd8f1ccd..a4c51b233f4 100644 --- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll +++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll @@ -693,11 +693,16 @@ define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i ret void } + +; FIXME: Should be able to fold this frameindex ; Without commuting the frame index in the pre-regalloc run of ; SIShrinkInstructions, this was using the VOP3 compare. ; GCN-LABEL: {{^}}commute_frameindex: -; GCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} +; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}} + +; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}} +; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}} define void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 { entry: %stack0 = alloca i32 diff --git a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll index b2afc054ce1..fb2d418b443 100644 --- a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll @@ -29,7 +29,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] ; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0x3c00, v[[B_F16]] +; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 1.0, v[[B_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @fadd_f16_imm_a( @@ -48,7 +48,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0x4000, v[[A_F16]] +; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 2.0, v[[A_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @fadd_f16_imm_b( @@ -104,8 +104,8 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0x3c00, v[[B_V2_F16]] -; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0x4000, v[[B_F16_1]] +; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]] +; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] @@ -132,8 +132,8 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0x4000, v[[A_V2_F16]] -; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0x3c00, v[[A_F16_1]] +; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 2.0, v[[A_V2_F16]] +; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 1.0, v[[A_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] diff --git a/llvm/test/CodeGen/AMDGPU/fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/fmul.f16.ll index da0e01d6a7f..9ce4d7684fe 100644 --- a/llvm/test/CodeGen/AMDGPU/fmul.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fmul.f16.ll @@ -48,7 +48,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_mul_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], 0x4400, v[[A_F16]] +; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], 4.0, v[[A_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @fmul_f16_imm_b( @@ -105,7 +105,7 @@ entry: ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] -; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4400, v[[B_F16_1]] +; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] @@ -132,7 +132,7 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4400, v[[A_V2_F16]] +; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll index a5c84b84bd2..fb15edbaaff 100644 --- a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll @@ -29,7 +29,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]] ; SI: v_subrev_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_sub_f16_e32 v[[R_F16:[0-9]+]], 0x3c00, v[[B_F16]] +; VI: v_sub_f16_e32 v[[R_F16:[0-9]+]], 1.0, v[[B_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @fsub_f16_imm_a( @@ -48,7 +48,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0xc000, v[[A_F16]] +; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], -2.0, v[[A_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @fsub_f16_imm_b( @@ -104,8 +104,8 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 0x3c00, v[[B_V2_F16]] -; VI: v_sub_f16_e32 v[[R_F16_1:[0-9]+]], 0x4000, v[[B_F16_1]] +; VI: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]] +; VI: v_sub_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] @@ -132,8 +132,8 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0xc000, v[[A_V2_F16]] -; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0xbc00, v[[A_F16_1]] +; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]] +; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], -1.0, v[[A_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll new file mode 100644 index 00000000000..ed970287abb --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/imm16.ll @@ -0,0 +1,316 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s + +; FIXME: Merge into imm.ll + +; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i16: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) { + store volatile i16 -32768, i16 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_0.0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_0.0_f16(half addrspace(1)* %out) { + store half 0.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_imm_neg_0.0_f16: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_imm_neg_0.0_f16(half addrspace(1)* %out) { + store half -0.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_0.5_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3800{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_0.5_f16(half addrspace(1)* %out) { + store half 0.5, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f16: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb800{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) { + store half -0.5, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_1.0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_1.0_f16(half addrspace(1)* %out) { + store half 1.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f16: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) { + store half -1.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_2.0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_2.0_f16(half addrspace(1)* %out) { + store half 2.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f16: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc000{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) { + store half -2.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_4.0_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4400{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_4.0_f16(half addrspace(1)* %out) { + store half 4.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f16: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc400{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) { + store half -4.0, half addrspace(1)* %out + ret void +} + + +; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3118{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) { + store half 0xH3118, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f16: +; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118{{$}} +; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb118{{$}} +; GCN: buffer_store_short [[REG]] +define void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) { + store half 0xHB118, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}store_literal_imm_f16: +; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c00 +; GCN: buffer_store_short [[REG]] +define void @store_literal_imm_f16(half addrspace(1)* %out) { + store half 4096.0, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_0.0_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_0.5_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0.5 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, -0.5 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_1.0_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 1.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, -1.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_2.0_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 2.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, -2.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_4.0_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 4.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, -4.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]] +; VI: buffer_store_short [[REG]] +define void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) { + %x = load half, half addrspace(1)* %in + %y = fadd half %x, 0.5 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_add_literal_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0x6400, [[VAL]] +; VI: buffer_store_short [[REG]] +define void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) { + %x = load half, half addrspace(1)* %in + %y = fadd half %x, 1024.0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_1_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 1, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xH0001 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_2_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 2, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xH0002 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_16_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 16, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xH0010 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], -1, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xHFFFF + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], -2, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xHFFFE + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], -16, [[VAL]]{{$}} +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xHFFF0 + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_63_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 63, [[VAL]] +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xH003F + store half %y, half addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}add_inline_imm_64_f16: +; VI: buffer_load_ushort [[VAL:v[0-9]+]] +; VI: v_add_f16_e32 [[REG:v[0-9]+]], 64, [[VAL]] +; VI: buffer_store_short [[REG]] +define void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) { + %y = fadd half %x, 0xH0040 + store half %y, half addrspace(1)* %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll index 8ab2efe651b..a4b8d7fa58d 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll @@ -20,7 +20,7 @@ define void @ldexp_f16( ; GCN-LABEL: {{^}}ldexp_f16_imm_a ; GCN: buffer_load_dword v[[B_I32:[0-9]+]] -; VI: v_ldexp_f16_e32 v[[R_F16:[0-9]+]], 0x4000, v[[B_I32]] +; VI: v_ldexp_f16_e32 v[[R_F16:[0-9]+]], 2.0, v[[B_I32]] ; GCN: buffer_store_short v[[R_F16]] define void @ldexp_f16_imm_a( half addrspace(1)* %r, diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll index 0accbad9988..0f75f7a5a49 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll @@ -51,7 +51,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_max_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_max_f16_e32 v[[R_F16:[0-9]+]], 0x4400, v[[A_F16]] +; VI: v_max_f16_e32 v[[R_F16:[0-9]+]], 4.0, v[[A_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @maxnum_f16_imm_b( @@ -108,7 +108,7 @@ entry: ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] -; VI: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 0x4400, v[[B_F16_1]] +; VI: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] @@ -135,7 +135,7 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4400, v[[A_V2_F16]] +; VI: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; VI: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll index 9f41df6fd25..6bf2e9ba2e3 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll @@ -51,7 +51,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]] ; SI: v_min_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_min_f16_e32 v[[R_F16:[0-9]+]], 0x4400, v[[A_F16]] +; VI: v_min_f16_e32 v[[R_F16:[0-9]+]], 4.0, v[[A_F16]] ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm define void @minnum_f16_imm_b( @@ -108,7 +108,7 @@ entry: ; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] ; VI: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]] -; VI: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 0x4400, v[[B_F16_1]] +; VI: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] ; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]] @@ -135,7 +135,7 @@ entry: ; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]] ; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]] ; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]] -; VI: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4400, v[[A_V2_F16]] +; VI: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]] ; VI: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]] ; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]] ; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]] diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll index a0ad475c569..eadec5c47ad 100644 --- a/llvm/test/CodeGen/AMDGPU/select.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll @@ -45,8 +45,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]] ; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}} -; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]] +; VI: v_cmp_lt_f16_e32 vcc, 0.5, v[[B_F16]] ; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm @@ -76,8 +75,7 @@ entry: ; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]] ; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]] ; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]] -; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}} -; VI: v_cmp_gt_f16_e32 vcc, v[[B_F16]], v[[A_F16]] +; VI: v_cmp_gt_f16_e32 vcc, 0.5, v[[A_F16]] ; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc ; GCN: buffer_store_short v[[R_F16]] ; GCN: s_endpgm diff --git a/llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir b/llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir new file mode 100644 index 00000000000..3277d37d7e4 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir @@ -0,0 +1,709 @@ +# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s +--- | + define void @add_f32_1.0_one_f16_use() #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f32.val = load volatile float, float addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xH3C00 + %f32.add = fadd float %f32.val, 1.000000e+00 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile float %f32.add, float addrspace(1)* undef + ret void + } + + define void @add_f32_1.0_multi_f16_use() #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f32.val = load volatile float, float addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xH3C00 + %f32.add = fadd float %f32.val, 1.000000e+00 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile float %f32.add, float addrspace(1)* undef + ret void + } + + define void @add_f32_1.0_one_f32_use_one_f16_use () #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f32.val = load volatile float, float addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xH3C00 + %f32.add = fadd float %f32.val, 1.000000e+00 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile float %f32.add, float addrspace(1)* undef + ret void + } + + define void @add_f32_1.0_one_f32_use_multi_f16_use () #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f32.val = load volatile float, float addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xH3C00 + %f16.add1 = fadd half %f16.val1, 0xH3C00 + %f32.add = fadd float %f32.val, 1.000000e+00 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile half %f16.add1, half addrspace(1)* undef + store volatile float %f32.add, float addrspace(1)* undef + ret void + } + + define void @add_i32_1_multi_f16_use() #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xH0001 + %f16.add1 = fadd half %f16.val1, 0xH0001 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile half %f16.add1,half addrspace(1)* undef + ret void + } + + define void @add_i32_m2_one_f32_use_multi_f16_use () #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f32.val = load volatile float, float addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xHFFFE + %f16.add1 = fadd half %f16.val1, 0xHFFFE + %f32.add = fadd float %f32.val, 0xffffffffc0000000 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile half %f16.add1, half addrspace(1)* undef + store volatile float %f32.add, float addrspace(1)* undef + ret void + } + + define void @add_f16_1.0_multi_f32_use() #0 { + %f32.val0 = load volatile float, float addrspace(1)* undef + %f32.val1 = load volatile float, float addrspace(1)* undef + %f32.val = load volatile float, float addrspace(1)* undef + %f32.add0 = fadd float %f32.val0, 1.0 + %f32.add1 = fadd float %f32.val1, 1.0 + store volatile float %f32.add0, float addrspace(1)* undef + store volatile float %f32.add1, float addrspace(1)* undef + ret void + } + + define void @add_f16_1.0_other_high_bits_multi_f16_use() #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f32.val = load volatile half, half addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xH3C00 + %f32.add = fadd half %f32.val, 1.000000e+00 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile half %f32.add, half addrspace(1)* undef + ret void + } + + define void @add_f16_1.0_other_high_bits_use_f16_f32() #0 { + %f16.val0 = load volatile half, half addrspace(1)* undef + %f16.val1 = load volatile half, half addrspace(1)* undef + %f32.val = load volatile half, half addrspace(1)* undef + %f16.add0 = fadd half %f16.val0, 0xH3C00 + %f32.add = fadd half %f32.val, 1.000000e+00 + store volatile half %f16.add0, half addrspace(1)* undef + store volatile half %f32.add, half addrspace(1)* undef + ret void + } + + attributes #0 = { nounwind } + +... +--- + +# f32 1.0 with a single use should be folded as the low 32-bits of a +# literal constant. + +# CHECK-LABEL: name: add_f32_1.0_one_f16_use +# CHECK: %13 = V_ADD_F16_e32 1065353216, killed %11, implicit %exec + +name: add_f32_1.0_one_f16_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = V_MOV_B32_e32 1065353216, implicit %exec + %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit %exec + BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + S_ENDPGM + +... +--- +# Materialized f32 inline immediate should not be folded into the f16 +# operands + +# CHECK-LABEL: name: add_f32_1.0_multi_f16_use +# CHECK: %13 = V_MOV_B32_e32 1065353216, implicit %exec +# CHECK: %14 = V_ADD_F16_e32 %13, killed %11, implicit %exec +# CHECK: %15 = V_ADD_F16_e32 killed %13, killed %12, implicit %exec + + +name: add_f32_1.0_multi_f16_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %13 = V_MOV_B32_e32 1065353216, implicit %exec + %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit %exec + %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit %exec + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + S_ENDPGM + +... +--- + +# f32 1.0 should be folded into the single f32 use as an inline +# immediate, and folded into the single f16 use as a literal constant + +# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use +# CHECK: %15 = V_ADD_F16_e32 1065353216, %11, implicit %exec +# CHECK: %16 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec + +name: add_f32_1.0_one_f32_use_one_f16_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } + - { id: 16, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %14 = V_MOV_B32_e32 1065353216, implicit %exec + %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit %exec + %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit %exec + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`) + S_ENDPGM + +... +--- + +# f32 1.0 should be folded for the single f32 use as an inline +# constant, and not folded as a multi-use literal for the f16 cases + +# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use +# CHECK: %14 = V_MOV_B32_e32 1065353216, implicit %exec +# CHECK: %15 = V_ADD_F16_e32 %14, %11, implicit %exec +# CHECK: %16 = V_ADD_F16_e32 %14, %12, implicit %exec +# CHECK: %17 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec + +name: add_f32_1.0_one_f32_use_multi_f16_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %14 = V_MOV_B32_e32 1065353216, implicit %exec + %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit %exec + %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit %exec + %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit %exec + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: add_i32_1_multi_f16_use +# CHECK: %13 = V_MOV_B32_e32 1, implicit %exec +# CHECK: %14 = V_ADD_F16_e32 1, killed %11, implicit %exec +# CHECK: %15 = V_ADD_F16_e32 1, killed %12, implicit %exec + + +name: add_i32_1_multi_f16_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %13 = V_MOV_B32_e32 1, implicit %exec + %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit %exec + %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit %exec + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + S_ENDPGM + +... +--- + +# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use +# CHECK: %14 = V_MOV_B32_e32 -2, implicit %exec +# CHECK: %15 = V_ADD_F16_e32 -2, %11, implicit %exec +# CHECK: %16 = V_ADD_F16_e32 -2, %12, implicit %exec +# CHECK: %17 = V_ADD_F32_e32 -2, killed %13, implicit %exec + +name: add_i32_m2_one_f32_use_multi_f16_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } + - { id: 16, class: vgpr_32 } + - { id: 17, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %14 = V_MOV_B32_e32 -2, implicit %exec + %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit %exec + %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit %exec + %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit %exec + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`) + S_ENDPGM + +... +--- + +# f32 1.0 should be folded for the single f32 use as an inline +# constant, and not folded as a multi-use literal for the f16 cases + +# CHECK-LABEL: name: add_f16_1.0_multi_f32_use +# CHECK: %13 = V_MOV_B32_e32 15360, implicit %exec +# CHECK: %14 = V_ADD_F32_e32 %13, %11, implicit %exec +# CHECK: %15 = V_ADD_F32_e32 %13, %12, implicit %exec + +name: add_f16_1.0_multi_f32_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %13 = V_MOV_B32_e32 15360, implicit %exec + %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit %exec + %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`) + S_ENDPGM + +... +--- + +# The low 16-bits are an inline immediate, but the high bits are junk +# FIXME: Should be able to fold this + +# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use +# CHECK: %13 = V_MOV_B32_e32 80886784, implicit %exec +# CHECK: %14 = V_ADD_F16_e32 %13, %11, implicit %exec +# CHECK: %15 = V_ADD_F16_e32 %13, %12, implicit %exec + +name: add_f16_1.0_other_high_bits_multi_f16_use +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = V_MOV_B32_e32 80886784, implicit %exec + %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit %exec + %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit %exec + BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + S_ENDPGM + +... +--- + +# FIXME: Should fold inline immediate into f16 and literal use into +# f32 instruction. + +# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32 +# CHECK: %13 = V_MOV_B32_e32 305413120, implicit %exec +# CHECK: %14 = V_ADD_F32_e32 %13, %11, implicit %exec +# CHECK: %15 = V_ADD_F16_e32 %13, %12, implicit %exec +name: add_f16_1.0_other_high_bits_use_f16_f32 +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_32 } + - { id: 2, class: sgpr_32 } + - { id: 3, class: vgpr_32 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_32 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: sreg_32 } + - { id: 10, class: sreg_128 } + - { id: 11, class: vgpr_32 } + - { id: 12, class: vgpr_32 } + - { id: 13, class: vgpr_32 } + - { id: 14, class: vgpr_32 } + - { id: 15, class: vgpr_32 } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + %4 = IMPLICIT_DEF + %5 = COPY %4.sub1 + %6 = IMPLICIT_DEF + %7 = COPY %6.sub0 + %8 = S_MOV_B32 61440 + %9 = S_MOV_B32 -1 + %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 + %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`) + %13 = V_MOV_B32_e32 305413120, implicit %exec + %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit %exec + %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`) + BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`) + S_ENDPGM + +... |