summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/half.ll
diff options
context:
space:
mode:
authorTim Renouf <tpr.llvm@botech.co.uk>2019-03-21 12:01:21 +0000
committerTim Renouf <tpr.llvm@botech.co.uk>2019-03-21 12:01:21 +0000
commit361b5b2193421824925a72669f1d06cd63c3d9a7 (patch)
tree76cf94d30c3a4d9caf4150a96b93e988ac360445 /llvm/test/CodeGen/AMDGPU/half.ll
parent92cbcfc325e08c07d5b0d5157f95ec0c90124e70 (diff)
downloadbcm5719-llvm-361b5b2193421824925a72669f1d06cd63c3d9a7.tar.gz
bcm5719-llvm-361b5b2193421824925a72669f1d06cd63c3d9a7.zip
[AMDGPU] Support for v3i32/v3f32
Added support for dwordx3 for most load/store types, but not DS, and not intrinsics yet. SI (gfx6) does not have dwordx3 instructions, so they are not enabled there. Some of this patch is from Matt Arsenault, also of AMD. Differential Revision: https://reviews.llvm.org/D58902 Change-Id: I913ef54f1433a7149da8d72f4af54dbb13436bd9 llvm-svn: 356659
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/half.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/half.ll9
1 files changed, 4 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index 49b4d71bcf2..1908015f477 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -78,14 +78,13 @@ define amdgpu_kernel void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)*
; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
; GCN: s_load_dwordx2 s
-; GCN: s_load_dwordx2 s
-; GCN-NOT: _load
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
+; GCN: s_load_dwordx2 s
+; GCN-NOT: _load
; GCN: v_cvt_f32_f16_e32
; GCN-NOT: v_cvt_f32_f16
-; GCN-DAG: _store_dword
-; GCN-DAG: _store_dwordx2
+; GCN-DAG: _store_dwordx3
; GCN: s_endpgm
define amdgpu_kernel void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
%ext = fpext <3 x half> %arg to <3 x float>
@@ -472,7 +471,7 @@ define amdgpu_kernel void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace
}
; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
-; GCN: flat_load_dwordx4
+; GCN: flat_load_dwordx3
; GCN-DAG: v_cvt_f16_f32_e32
; SI-DAG: v_cvt_f16_f32_e32
; VI-DAG: v_cvt_f16_f32_sdwa
OpenPOWER on IntegriCloud