diff options
author | Justin Bogner <mail@justinbogner.com> | 2019-03-27 20:35:56 +0000 |
---|---|---|
committer | Justin Bogner <mail@justinbogner.com> | 2019-03-27 20:35:56 +0000 |
commit | b1650f0da92bc9256627a1a692f847c6e1b1d210 (patch) | |
tree | c9fe46d35b9eb80d5b4434b2309cdcd766da0916 /llvm/test/CodeGen/AMDGPU/load-local-i16.ll | |
parent | ee9f2ae5b913cf571997091c4d7cac99eccd29a0 (diff) | |
download | bcm5719-llvm-b1650f0da92bc9256627a1a692f847c6e1b1d210.tar.gz bcm5719-llvm-b1650f0da92bc9256627a1a692f847c6e1b1d210.zip |
[LegalizeVectorTypes] Allow single loads and stores for more short vectors
When lowering a load or store for TypeWidenVector, the type legalizer
would use a single load or store if the associated integer type was legal
or promoted. E.g. it loads a v4i8 as an i32 if i32 is legal/promotable.
(See https://reviews.llvm.org/rL236528 for reference.)
This applies that behaviour to vector types. If the vector type is
TypePromoteInteger, the element type is going to be TypePromoteInteger
as well, which will lead to have a single promoting load rather than N
individual promoting loads. For instance, if we have a v3i1, we would
now have a load of v4i1 instead of 3 loads of i1.
Patch by Guillaume Marques. Thanks!
Differential Revision: https://reviews.llvm.org/D56201
llvm-svn: 357120
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/load-local-i16.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-local-i16.ll | 10 |
1 files changed, 7 insertions, 3 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll index 5913e7275e5..d8d7d98e308 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll @@ -52,7 +52,7 @@ entry: ; GCN-DAG: ds_write_b16 ; EG-DAG: LDS_USHORT_READ_RET -; EG-DAG: LDS_READ_RET +; EG-DAG: LDS_USHORT_READ_RET define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in @@ -235,7 +235,9 @@ define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* ; GCN-DAG: ds_write_b32 ; GCN-DAG: ds_write_b64 -; EG: LDS_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in @@ -252,7 +254,9 @@ entry: ; GCN-DAG: ds_write_b32 ; GCN-DAG: ds_write_b64 -; EG: LDS_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET ; EG-DAG: BFE_INT ; EG-DAG: BFE_INT ; EG-DAG: BFE_INT |