summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/br_cc.f16.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/commute-compares.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/fadd.f16.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/fmul.f16.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/fsub.f16.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/imm16.ll316
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/select.f16.ll6
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir709
11 files changed, 1059 insertions, 31 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
index b7584714919..340d30b898e 100644
--- a/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
@@ -41,7 +41,7 @@ two:
}
; GCN-LABEL: {{^}}br_cc_f16_imm_a
-; GCN: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
+; SI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; GCN: buffer_load_ushort v[[B_F16:[0-9]+]]
@@ -49,7 +49,7 @@ two:
; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
; SI: s_cbranch_vccz
-; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
+; VI: v_cmp_nlt_f16_e32 vcc, 0.5, v[[B_F16]]
; VI: s_cbranch_vccnz
; VI: one{{$}}
@@ -80,13 +80,13 @@ two:
}
; GCN-LABEL: {{^}}br_cc_f16_imm_b
-; GCN: v_mov_b32_e32 v[[B_F16:[0-9]+]], {{0x37ff|0x3800}}{{$}}
+; SI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
-; VI: v_cmp_ngt_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
+; VI: v_cmp_ngt_f16_e32 vcc, 0.5, v[[A_F16]]
; GCN: s_cbranch_vccnz
; GCN: one{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/commute-compares.ll b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
index 055fd8f1ccd..a4c51b233f4 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-compares.ll
@@ -693,11 +693,16 @@ define void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double addrspace(1)* %i
ret void
}
+
+; FIXME: Should be able to fold this frameindex
; Without commuting the frame index in the pre-regalloc run of
; SIShrinkInstructions, this was using the VOP3 compare.
; GCN-LABEL: {{^}}commute_frameindex:
-; GCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
+; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
+
+; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 0{{$}}
+; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}}
define void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
entry:
%stack0 = alloca i32
diff --git a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
index b2afc054ce1..fb2d418b443 100644
--- a/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fadd.f16.ll
@@ -29,7 +29,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[A_F32]], v[[B_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0x3c00, v[[B_F16]]
+; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 1.0, v[[B_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fadd_f16_imm_a(
@@ -48,7 +48,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0x4000, v[[A_F16]]
+; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 2.0, v[[A_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fadd_f16_imm_b(
@@ -104,8 +104,8 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0x3c00, v[[B_V2_F16]]
-; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0x4000, v[[B_F16_1]]
+; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
+; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
@@ -132,8 +132,8 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_add_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0x4000, v[[A_V2_F16]]
-; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0x3c00, v[[A_F16_1]]
+; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 2.0, v[[A_V2_F16]]
+; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 1.0, v[[A_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
diff --git a/llvm/test/CodeGen/AMDGPU/fmul.f16.ll b/llvm/test/CodeGen/AMDGPU/fmul.f16.ll
index da0e01d6a7f..9ce4d7684fe 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul.f16.ll
@@ -48,7 +48,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_mul_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], 0x4400, v[[A_F16]]
+; VI: v_mul_f16_e32 v[[R_F16:[0-9]+]], 4.0, v[[A_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fmul_f16_imm_b(
@@ -105,7 +105,7 @@ entry:
; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
-; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4400, v[[B_F16_1]]
+; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
@@ -132,7 +132,7 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_mul_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 0x4400, v[[A_V2_F16]]
+; VI: v_mul_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
; VI: v_mul_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll
index a5c84b84bd2..fb15edbaaff 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.f16.ll
@@ -29,7 +29,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
; SI: v_subrev_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_sub_f16_e32 v[[R_F16:[0-9]+]], 0x3c00, v[[B_F16]]
+; VI: v_sub_f16_e32 v[[R_F16:[0-9]+]], 1.0, v[[B_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fsub_f16_imm_a(
@@ -48,7 +48,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_add_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], 0xc000, v[[A_F16]]
+; VI: v_add_f16_e32 v[[R_F16:[0-9]+]], -2.0, v[[A_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fsub_f16_imm_b(
@@ -104,8 +104,8 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 0x3c00, v[[B_V2_F16]]
-; VI: v_sub_f16_e32 v[[R_F16_1:[0-9]+]], 0x4000, v[[B_F16_1]]
+; VI: v_sub_f16_e32 v[[R_F16_0:[0-9]+]], 1.0, v[[B_V2_F16]]
+; VI: v_sub_f16_e32 v[[R_F16_1:[0-9]+]], 2.0, v[[B_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
@@ -132,8 +132,8 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_subrev_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], 0xc000, v[[A_V2_F16]]
-; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], 0xbc00, v[[A_F16_1]]
+; VI: v_add_f16_e32 v[[R_F16_0:[0-9]+]], -2.0, v[[A_V2_F16]]
+; VI: v_add_f16_e32 v[[R_F16_1:[0-9]+]], -1.0, v[[A_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
diff --git a/llvm/test/CodeGen/AMDGPU/imm16.ll b/llvm/test/CodeGen/AMDGPU/imm16.ll
new file mode 100644
index 00000000000..ed970287abb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/imm16.ll
@@ -0,0 +1,316 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+
+; FIXME: Merge into imm.ll
+
+; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_i16:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_neg_0.0_i16(i16 addrspace(1)* %out) {
+ store volatile i16 -32768, i16 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_0.0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_0.0_f16(half addrspace(1)* %out) {
+ store half 0.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_imm_neg_0.0_f16:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x8000{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffff8000{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_imm_neg_0.0_f16(half addrspace(1)* %out) {
+ store half -0.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_0.5_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3800{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_0.5_f16(half addrspace(1)* %out) {
+ store half 0.5, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_0.5_f16:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb800{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_m_0.5_f16(half addrspace(1)* %out) {
+ store half -0.5, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_1.0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_1.0_f16(half addrspace(1)* %out) {
+ store half 1.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_1.0_f16:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffbc00{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_m_1.0_f16(half addrspace(1)* %out) {
+ store half -1.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_2.0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4000{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_2.0_f16(half addrspace(1)* %out) {
+ store half 2.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_2.0_f16:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc000{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_m_2.0_f16(half addrspace(1)* %out) {
+ store half -2.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_4.0_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x4400{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_4.0_f16(half addrspace(1)* %out) {
+ store half 4.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_4.0_f16:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffc400{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_m_4.0_f16(half addrspace(1)* %out) {
+ store half -4.0, half addrspace(1)* %out
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3118{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_inv_2pi_f16(half addrspace(1)* %out) {
+ store half 0xH3118, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_f16:
+; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118{{$}}
+; VI: v_mov_b32_e32 [[REG:v[0-9]+]], 0xffffb118{{$}}
+; GCN: buffer_store_short [[REG]]
+define void @store_inline_imm_m_inv_2pi_f16(half addrspace(1)* %out) {
+ store half 0xHB118, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_literal_imm_f16:
+; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c00
+; GCN: buffer_store_short [[REG]]
+define void @store_literal_imm_f16(half addrspace(1)* %out) {
+ store half 4096.0, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_0.0_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_0.0_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_0.5_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_0.5_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0.5
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], -0.5, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_neg_0.5_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, -0.5
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_1.0_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 1.0, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_1.0_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 1.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], -1.0, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_neg_1.0_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, -1.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_2.0_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 2.0, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_2.0_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 2.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], -2.0, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_neg_2.0_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, -2.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_4.0_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 4.0, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_4.0_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 4.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], -4.0, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_neg_4.0_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, -4.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0.5, [[VAL]]
+; VI: buffer_store_short [[REG]]
+define void @commute_add_inline_imm_0.5_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
+ %x = load half, half addrspace(1)* %in
+ %y = fadd half %x, 0.5
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_add_literal_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 0x6400, [[VAL]]
+; VI: buffer_store_short [[REG]]
+define void @commute_add_literal_f16(half addrspace(1)* %out, half addrspace(1)* %in) {
+ %x = load half, half addrspace(1)* %in
+ %y = fadd half %x, 1024.0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_1_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 1, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_1_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xH0001
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_2_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 2, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_2_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xH0002
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_16_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 16, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_16_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xH0010
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_neg_1_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], -1, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_neg_1_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xHFFFF
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_neg_2_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], -2, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_neg_2_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xHFFFE
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_neg_16_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], -16, [[VAL]]{{$}}
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_neg_16_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xHFFF0
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_63_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 63, [[VAL]]
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_63_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xH003F
+ store half %y, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}add_inline_imm_64_f16:
+; VI: buffer_load_ushort [[VAL:v[0-9]+]]
+; VI: v_add_f16_e32 [[REG:v[0-9]+]], 64, [[VAL]]
+; VI: buffer_store_short [[REG]]
+define void @add_inline_imm_64_f16(half addrspace(1)* %out, half %x) {
+ %y = fadd half %x, 0xH0040
+ store half %y, half addrspace(1)* %out
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll
index 8ab2efe651b..a4b8d7fa58d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ldexp.f16.ll
@@ -20,7 +20,7 @@ define void @ldexp_f16(
; GCN-LABEL: {{^}}ldexp_f16_imm_a
; GCN: buffer_load_dword v[[B_I32:[0-9]+]]
-; VI: v_ldexp_f16_e32 v[[R_F16:[0-9]+]], 0x4000, v[[B_I32]]
+; VI: v_ldexp_f16_e32 v[[R_F16:[0-9]+]], 2.0, v[[B_I32]]
; GCN: buffer_store_short v[[R_F16]]
define void @ldexp_f16_imm_a(
half addrspace(1)* %r,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index 0accbad9988..0f75f7a5a49 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -51,7 +51,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_max_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_max_f16_e32 v[[R_F16:[0-9]+]], 0x4400, v[[A_F16]]
+; VI: v_max_f16_e32 v[[R_F16:[0-9]+]], 4.0, v[[A_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @maxnum_f16_imm_b(
@@ -108,7 +108,7 @@ entry:
; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
-; VI: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 0x4400, v[[B_F16_1]]
+; VI: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
@@ -135,7 +135,7 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_max_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 0x4400, v[[A_V2_F16]]
+; VI: v_max_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
; VI: v_max_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index 9f41df6fd25..6bf2e9ba2e3 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -51,7 +51,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
; SI: v_min_f32_e32 v[[R_F32:[0-9]+]], v[[B_F32]], v[[A_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_min_f16_e32 v[[R_F16:[0-9]+]], 0x4400, v[[A_F16]]
+; VI: v_min_f16_e32 v[[R_F16:[0-9]+]], 4.0, v[[A_F16]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @minnum_f16_imm_b(
@@ -108,7 +108,7 @@ entry:
; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], v[[A_F32_1]], v[[B_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
; VI: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4200, v[[B_V2_F16]]
-; VI: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 0x4400, v[[B_F16_1]]
+; VI: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 4.0, v[[B_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
@@ -135,7 +135,7 @@ entry:
; SI: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
; SI: v_min_f32_e32 v[[R_F32_1:[0-9]+]], v[[B_F32_1]], v[[A_F32_1]]
; SI: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
-; VI: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 0x4400, v[[A_V2_F16]]
+; VI: v_min_f16_e32 v[[R_F16_0:[0-9]+]], 4.0, v[[A_V2_F16]]
; VI: v_min_f16_e32 v[[R_F16_1:[0-9]+]], 0x4200, v[[A_F16_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll
index a0ad475c569..eadec5c47ad 100644
--- a/llvm/test/CodeGen/AMDGPU/select.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll
@@ -45,8 +45,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
-; VI: v_cmp_lt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
+; VI: v_cmp_lt_f16_e32 vcc, 0.5, v[[B_F16]]
; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
@@ -76,8 +75,7 @@ entry:
; SI: v_cvt_f32_f16_e32 v[[D_F32:[0-9]+]], v[[D_F16]]
; SI: v_cndmask_b32_e32 v[[R_F32:[0-9]+]], v[[D_F32]], v[[C_F32]]
; SI: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[R_F32]]
-; VI: v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
-; VI: v_cmp_gt_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
+; VI: v_cmp_gt_f16_e32 vcc, 0.5, v[[A_F16]]
; VI: v_cndmask_b32_e32 v[[R_F16:[0-9]+]], v[[D_F16]], v[[C_F16]], vcc
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir b/llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir
new file mode 100644
index 00000000000..3277d37d7e4
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/fold-imm-f16-f32.mir
@@ -0,0 +1,709 @@
+# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
+--- |
+ define void @add_f32_1.0_one_f16_use() #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f32.val = load volatile float, float addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xH3C00
+ %f32.add = fadd float %f32.val, 1.000000e+00
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile float %f32.add, float addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_f32_1.0_multi_f16_use() #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f32.val = load volatile float, float addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xH3C00
+ %f32.add = fadd float %f32.val, 1.000000e+00
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile float %f32.add, float addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f32.val = load volatile float, float addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xH3C00
+ %f32.add = fadd float %f32.val, 1.000000e+00
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile float %f32.add, float addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f32.val = load volatile float, float addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xH3C00
+ %f16.add1 = fadd half %f16.val1, 0xH3C00
+ %f32.add = fadd float %f32.val, 1.000000e+00
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile half %f16.add1, half addrspace(1)* undef
+ store volatile float %f32.add, float addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_i32_1_multi_f16_use() #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xH0001
+ %f16.add1 = fadd half %f16.val1, 0xH0001
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile half %f16.add1,half addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f32.val = load volatile float, float addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xHFFFE
+ %f16.add1 = fadd half %f16.val1, 0xHFFFE
+ %f32.add = fadd float %f32.val, 0xffffffffc0000000
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile half %f16.add1, half addrspace(1)* undef
+ store volatile float %f32.add, float addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_f16_1.0_multi_f32_use() #0 {
+ %f32.val0 = load volatile float, float addrspace(1)* undef
+ %f32.val1 = load volatile float, float addrspace(1)* undef
+ %f32.val = load volatile float, float addrspace(1)* undef
+ %f32.add0 = fadd float %f32.val0, 1.0
+ %f32.add1 = fadd float %f32.val1, 1.0
+ store volatile float %f32.add0, float addrspace(1)* undef
+ store volatile float %f32.add1, float addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f32.val = load volatile half, half addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xH3C00
+ %f32.add = fadd half %f32.val, 1.000000e+00
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile half %f32.add, half addrspace(1)* undef
+ ret void
+ }
+
+ define void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
+ %f16.val0 = load volatile half, half addrspace(1)* undef
+ %f16.val1 = load volatile half, half addrspace(1)* undef
+ %f32.val = load volatile half, half addrspace(1)* undef
+ %f16.add0 = fadd half %f16.val0, 0xH3C00
+ %f32.add = fadd half %f32.val, 1.000000e+00
+ store volatile half %f16.add0, half addrspace(1)* undef
+ store volatile half %f32.add, half addrspace(1)* undef
+ ret void
+ }
+
+ attributes #0 = { nounwind }
+
+...
+---
+
+# f32 1.0 with a single use should be folded as the low 32-bits of a
+# literal constant.
+
+# CHECK-LABEL: name: add_f32_1.0_one_f16_use
+# CHECK: %13 = V_ADD_F16_e32 1065353216, killed %11, implicit %exec
+
+name: add_f32_1.0_one_f16_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = V_MOV_B32_e32 1065353216, implicit %exec
+ %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit %exec
+ BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+# Materialized f32 inline immediate should not be folded into the f16
+# operands
+
+# CHECK-LABEL: name: add_f32_1.0_multi_f16_use
+# CHECK: %13 = V_MOV_B32_e32 1065353216, implicit %exec
+# CHECK: %14 = V_ADD_F16_e32 %13, killed %11, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 killed %13, killed %12, implicit %exec
+
+
+name: add_f32_1.0_multi_f16_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %13 = V_MOV_B32_e32 1065353216, implicit %exec
+ %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit %exec
+ %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit %exec
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# f32 1.0 should be folded into the single f32 use as an inline
+# immediate, and folded into the single f16 use as a literal constant
+
+# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use
+# CHECK: %15 = V_ADD_F16_e32 1065353216, %11, implicit %exec
+# CHECK: %16 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec
+
+name: add_f32_1.0_one_f32_use_one_f16_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %14 = V_MOV_B32_e32 1065353216, implicit %exec
+ %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit %exec
+ %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit %exec
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# f32 1.0 should be folded for the single f32 use as an inline
+# constant, and not folded as a multi-use literal for the f16 cases
+
+# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
+# CHECK: %14 = V_MOV_B32_e32 1065353216, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 %14, %11, implicit %exec
+# CHECK: %16 = V_ADD_F16_e32 %14, %12, implicit %exec
+# CHECK: %17 = V_ADD_F32_e32 1065353216, killed %13, implicit %exec
+
+name: add_f32_1.0_one_f32_use_multi_f16_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vgpr_32 }
+ - { id: 17, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %14 = V_MOV_B32_e32 1065353216, implicit %exec
+ %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit %exec
+ %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit %exec
+ %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit %exec
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+# CHECK-LABEL: name: add_i32_1_multi_f16_use
+# CHECK: %13 = V_MOV_B32_e32 1, implicit %exec
+# CHECK: %14 = V_ADD_F16_e32 1, killed %11, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 1, killed %12, implicit %exec
+
+
+name: add_i32_1_multi_f16_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %13 = V_MOV_B32_e32 1, implicit %exec
+ %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit %exec
+ %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit %exec
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
+# CHECK: %14 = V_MOV_B32_e32 -2, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 -2, %11, implicit %exec
+# CHECK: %16 = V_ADD_F16_e32 -2, %12, implicit %exec
+# CHECK: %17 = V_ADD_F32_e32 -2, killed %13, implicit %exec
+
+name: add_i32_m2_one_f32_use_multi_f16_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+ - { id: 16, class: vgpr_32 }
+ - { id: 17, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %14 = V_MOV_B32_e32 -2, implicit %exec
+ %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit %exec
+ %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit %exec
+ %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit %exec
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# f32 1.0 should be folded for the single f32 use as an inline
+# constant, and not folded as a multi-use literal for the f16 cases
+
+# CHECK-LABEL: name: add_f16_1.0_multi_f32_use
+# CHECK: %13 = V_MOV_B32_e32 15360, implicit %exec
+# CHECK: %14 = V_ADD_F32_e32 %13, %11, implicit %exec
+# CHECK: %15 = V_ADD_F32_e32 %13, %12, implicit %exec
+
+name: add_f16_1.0_multi_f32_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %13 = V_MOV_B32_e32 15360, implicit %exec
+ %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit %exec
+ %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# The low 16-bits are an inline immediate, but the high bits are junk
+# FIXME: Should be able to fold this
+
+# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
+# CHECK: %13 = V_MOV_B32_e32 80886784, implicit %exec
+# CHECK: %14 = V_ADD_F16_e32 %13, %11, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 %13, %12, implicit %exec
+
+name: add_f16_1.0_other_high_bits_multi_f16_use
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = V_MOV_B32_e32 80886784, implicit %exec
+ %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit %exec
+ %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit %exec
+ BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ S_ENDPGM
+
+...
+---
+
+# FIXME: Should fold inline immediate into f16 and literal use into
+# f32 instruction.
+
+# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
+# CHECK: %13 = V_MOV_B32_e32 305413120, implicit %exec
+# CHECK: %14 = V_ADD_F32_e32 %13, %11, implicit %exec
+# CHECK: %15 = V_ADD_F16_e32 %13, %12, implicit %exec
+name: add_f16_1.0_other_high_bits_use_f16_f32
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_64 }
+ - { id: 1, class: sreg_32 }
+ - { id: 2, class: sgpr_32 }
+ - { id: 3, class: vgpr_32 }
+ - { id: 4, class: sreg_64 }
+ - { id: 5, class: sreg_32 }
+ - { id: 6, class: sreg_64 }
+ - { id: 7, class: sreg_32 }
+ - { id: 8, class: sreg_32 }
+ - { id: 9, class: sreg_32 }
+ - { id: 10, class: sreg_128 }
+ - { id: 11, class: vgpr_32 }
+ - { id: 12, class: vgpr_32 }
+ - { id: 13, class: vgpr_32 }
+ - { id: 14, class: vgpr_32 }
+ - { id: 15, class: vgpr_32 }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 0
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+body: |
+ bb.0 (%ir-block.0):
+ %4 = IMPLICIT_DEF
+ %5 = COPY %4.sub1
+ %6 = IMPLICIT_DEF
+ %7 = COPY %6.sub0
+ %8 = S_MOV_B32 61440
+ %9 = S_MOV_B32 -1
+ %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
+ %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`)
+ %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 2 from `half addrspace(1)* undef`)
+ %13 = V_MOV_B32_e32 305413120, implicit %exec
+ %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit %exec
+ %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `float addrspace(1)* undef`)
+ BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 2 into `half addrspace(1)* undef`)
+ S_ENDPGM
+
+...
OpenPOWER on IntegriCloud