summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/fneg.f16.ll9
-rw-r--r--llvm/test/CodeGen/AMDGPU/store-global.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/store-hi16.ll594
3 files changed, 601 insertions, 6 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
index fb6bd6c6637..a27a0b444ae 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg.f16.ll
@@ -1,6 +1,6 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=CIVI -check-prefix=GCN %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=CIVI -check-prefix=GCN -check-prefix=GFX89 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 -check-prefix=GCN -check-prefix=GFX89 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=kaveri -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,CIVI %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,CIVI,GFX89 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx901 -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
; FIXME: Should be able to do scalar op
; GCN-LABEL: {{^}}s_fneg_f16:
@@ -154,7 +154,8 @@ define amdgpu_kernel void @v_extract_fneg_fold_v2f16(<2 x half> addrspace(1)* %i
; GCN-LABEL: {{^}}v_extract_fneg_no_fold_v2f16:
; GCN: {{flat|global}}_load_dword [[VAL:v[0-9]+]]
; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80008000, [[VAL]]
-; GCN: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]]
+; CIVI: v_lshrrev_b32_e32 [[ELT1:v[0-9]+]], 16, [[NEG]]
+; GFX9: global_store_short_d16_hi v{{\[[0-9]+:[0-9]+\]}}, [[NEG]], off
define amdgpu_kernel void @v_extract_fneg_no_fold_v2f16(<2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%fneg = fsub <2 x half> <half -0.0, half -0.0>, %val
diff --git a/llvm/test/CodeGen/AMDGPU/store-global.ll b/llvm/test/CodeGen/AMDGPU/store-global.ll
index 35714fe8ee4..3c414974ff8 100644
--- a/llvm/test/CodeGen/AMDGPU/store-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-global.ll
@@ -85,11 +85,11 @@ entry:
}
; FUNC-LABEL: {{^}}store_i24:
-; GCN: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
+; SIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
; SIVI-DAG: buffer_store_byte
; SIVI-DAG: buffer_store_short
-; GFX9-DAG: global_store_byte
+; GFX9-DAG: global_store_byte_d16_hi v{{\[[0-9]:[0-9]+\]}}, v{{[0-9]+}}, off offset:2
; GFX9-DAG: global_store_short
; EG: MEM_RAT MSKOR
diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
new file mode 100644
index 00000000000..99af332949a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
@@ -0,0 +1,594 @@
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s
+
+; GCN-LABEL: {{^}}store_global_hi_v2i16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_short v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_v2i16(i16 addrspace(1)* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ store i16 %hi, i16 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_v2f16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_short v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_v2f16(half addrspace(1)* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x half>
+ %hi = extractelement <2 x half> %value, i32 1
+ store half %hi, half addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_i32_shift:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_short v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_i32_shift(i16 addrspace(1)* %out, i32 %value) #0 {
+entry:
+ %hi32 = lshr i32 %value, 16
+ %hi = trunc i32 %hi32 to i16
+ store i16 %hi, i16 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_v2i16_i8:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_byte v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_v2i16_i8(i8 addrspace(1)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ store i8 %trunc, i8 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_i8_shift:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_byte v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_i8_shift(i8 addrspace(1)* %out, i32 %value) #0 {
+entry:
+ %hi32 = lshr i32 %value, 16
+ %hi = trunc i32 %hi32 to i8
+ store i8 %hi, i8 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_v2i16_max_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:4094
+
+; VI-DAG: v_add_i32_e32
+; VI-DAG: v_addc_u32_e32
+; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
+
+; VI: flat_store_short v[0:1], v2{{$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_v2i16_max_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 2047
+ store i16 %hi, i16 addrspace(1)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_v2i16_min_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: global_store_short_d16_hi v[0:1], v2, off offset:-4096{{$}}
+
+; VI-DAG: v_add_i32_e32
+; VI-DAG: v_addc_u32_e32
+; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
+
+; VI: flat_store_short v[0:1], v{{[0-9]$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_v2i16_min_offset(i16 addrspace(1)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 -2048
+ store i16 %hi, i16 addrspace(1)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_max_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:4095
+
+; VI-DAG: v_add_i32_e32
+; VI-DAG: v_addc_u32_e32
+; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
+; VI: flat_store_byte v[0:1], v{{[0-9]$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_v2i16_i8_max_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 4095
+ store i8 %trunc, i8 addrspace(1)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_global_hi_v2i16_i8_min_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: global_store_byte_d16_hi v[0:1], v2, off offset:-4095
+
+; VI-DAG: v_add_i32_e32
+; VI-DAG: v_addc_u32_e32
+; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
+
+; VI: flat_store_byte v[0:1], v{{[0-9]$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_global_hi_v2i16_i8_min_offset(i8 addrspace(1)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ %gep = getelementptr inbounds i8, i8 addrspace(1)* %out, i64 -4095
+ store i8 %trunc, i8 addrspace(1)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_v2i16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_short v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_v2i16(i16 addrspace(4)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ store i16 %hi, i16 addrspace(4)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_v2f16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_short v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_v2f16(half addrspace(4)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x half>
+ %hi = extractelement <2 x half> %value, i32 1
+ store half %hi, half addrspace(4)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_i32_shift:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_short v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_i32_shift(i16 addrspace(4)* %out, i32 %value) #0 {
+entry:
+ %hi32 = lshr i32 %value, 16
+ %hi = trunc i32 %hi32 to i16
+ store i16 %hi, i16 addrspace(4)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_byte v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_v2i16_i8(i8 addrspace(4)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ store i8 %trunc, i8 addrspace(4)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_i8_shift:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; VI-NEXT: flat_store_byte v[0:1], v2
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_i8_shift(i8 addrspace(4)* %out, i32 %value) #0 {
+entry:
+ %hi32 = lshr i32 %value, 16
+ %hi = trunc i32 %hi32 to i8
+ store i8 %hi, i8 addrspace(4)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_v2i16_max_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:4094{{$}}
+
+; VI-DAG: v_add_i32_e32
+; VI-DAG: v_addc_u32_e32
+; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
+; VI: flat_store_short v[0:1], v2{{$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_v2i16_max_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 2047
+ store i16 %hi, i16 addrspace(4)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_v2i16_neg_offset:
+; GCN: s_waitcnt
+; GCN: v_add_i32_e32
+; GCN: v_addc_u32_e32
+
+; GFX9-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
+; VI: flat_store_short v[0:1], v2{{$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_v2i16_neg_offset(i16 addrspace(4)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %gep = getelementptr inbounds i16, i16 addrspace(4)* %out, i64 -1023
+ store i16 %hi, i16 addrspace(4)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_max_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:4095{{$}}
+
+; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
+; VI-DAG: v_add_i32_e32
+; VI-DAG: v_addc_u32_e32
+; VI: flat_store_byte v[0:1], v2{{$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_v2i16_i8_max_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 4095
+ store i8 %trunc, i8 addrspace(4)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_flat_hi_v2i16_i8_neg_offset:
+; GCN: s_waitcnt
+; GCN-DAG: v_add_i32_e32
+; GCN-DAG: v_addc_u32_e32
+
+; GFX9-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
+; VI-DAG: v_lshrrev_b32_e32 v2, 16, v2
+; VI: flat_store_byte v[0:1], v2{{$}}
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_flat_hi_v2i16_i8_neg_offset(i8 addrspace(4)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ %gep = getelementptr inbounds i8, i8 addrspace(4)* %out, i64 -4095
+ store i8 %trunc, i8 addrspace(4)* %gep
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_hi_v2i16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+
+; VI: v_lshrrev_b32_e32 v1, 16, v1
+; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_v2i16(i16* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ store i16 %hi, i16* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_hi_v2f16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+
+; VI: v_lshrrev_b32_e32 v1, 16, v1
+; VI: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_v2f16(half* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x half>
+ %hi = extractelement <2 x half> %value, i32 1
+ store half %hi, half* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_hi_i32_shift:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_i32_shift(i16* %out, i32 %value) #0 {
+entry:
+ %hi32 = lshr i32 %value, 16
+ %hi = trunc i32 %hi32 to i16
+ store i16 %hi, i16* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_hi_v2i16_i8:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_v2i16_i8(i8* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ store i8 %trunc, i8* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_hi_i8_shift:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], s4 offen{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; VI-NEXT: buffer_store_byte v1, v0, s[0:3], s4 offen{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_i8_shift(i8* %out, i32 %value) #0 {
+entry:
+ %hi32 = lshr i32 %value, 16
+ %hi = trunc i32 %hi32 to i8
+ store i8 %hi, i8* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_private_hi_v2i16_max_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: buffer_store_short_d16_hi v1, v0, s[0:3], s4 offen offset:4094{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1
+; VI-NEXT: buffer_store_short v1, v0, s[0:3], s4 offen offset:4094{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_v2i16_max_offset(i16* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %gep = getelementptr inbounds i16, i16* %out, i64 2047
+ store i16 %hi, i16* %gep
+ ret void
+}
+
+
+
+; GCN-LABEL: {{^}}store_private_hi_v2i16_nooff:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: buffer_store_short_d16_hi v0, off, s[0:3], s4{{$}}
+
+; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0
+; VI-NEXT: buffer_store_short v0, off, s[0:3], s4{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_v2i16_nooff(i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ store volatile i16 %hi, i16* null
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}store_private_hi_v2i16_i8_nooff:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: buffer_store_byte_d16_hi v0, off, s[0:3], s4{{$}}
+
+; VI: v_lshrrev_b32_e32 v0, 16, v0
+; VI: buffer_store_byte v0, off, s[0:3], s4{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_private_hi_v2i16_i8_nooff(i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ store volatile i8 %trunc, i8* null
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_local_hi_v2i16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
+
+; VI: v_lshrrev_b32_e32 v1, 16, v1
+; VI: ds_write_b16 v0, v1
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_local_hi_v2i16(i16 addrspace(3)* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ store i16 %hi, i16 addrspace(3)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_local_hi_v2f16:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
+
+; VI: v_lshrrev_b32_e32 v1, 16, v1
+; VI: ds_write_b16 v0, v1
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_local_hi_v2f16(half addrspace(3)* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x half>
+ %hi = extractelement <2 x half> %value, i32 1
+ store half %hi, half addrspace(3)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_local_hi_i32_shift:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1{{$}}
+
+; VI: v_lshrrev_b32_e32 v1, 16, v1
+; VI: ds_write_b16 v0, v1
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_local_hi_i32_shift(i16 addrspace(3)* %out, i32 %value) #0 {
+entry:
+ %hi32 = lshr i32 %value, 16
+ %hi = trunc i32 %hi32 to i16
+ store i16 %hi, i16 addrspace(3)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_local_hi_v2i16_i8:
+; GCN: s_waitcnt
+
+; GFX9-NEXT: ds_write_b8_d16_hi v0, v1{{$}}
+
+; VI: v_lshrrev_b32_e32 v1, 16, v1
+; VI: ds_write_b8 v0, v1
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_local_hi_v2i16_i8(i8 addrspace(3)* %out, i32 %arg) #0 {
+entry:
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %trunc = trunc i16 %hi to i8
+ store i8 %trunc, i8 addrspace(3)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}store_local_hi_v2i16_max_offset:
+; GCN: s_waitcnt
+; GFX9-NEXT: ds_write_b16_d16_hi v0, v1 offset:65534{{$}}
+
+; VI: v_lshrrev_b32_e32 v1, 16, v1
+; VI: ds_write_b16 v0, v1 offset:65534{{$}}
+
+; GCN-NEXT: s_waitcnt
+; GCN-NEXT: s_setpc_b64
+define void @store_local_hi_v2i16_max_offset(i16 addrspace(3)* %out, i32 %arg) #0 {
+entry:
+ ; FIXME: ABI for pre-gfx9
+ %value = bitcast i32 %arg to <2 x i16>
+ %hi = extractelement <2 x i16> %value, i32 1
+ %gep = getelementptr inbounds i16, i16 addrspace(3)* %out, i64 32767
+ store i16 %hi, i16 addrspace(3)* %gep
+ ret void
+}
+
+attributes #0 = { nounwind }
OpenPOWER on IntegriCloud