diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-constant-i16.ll | 26 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-global-i16.ll | 20 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-local-i16.ll | 10 |
3 files changed, 29 insertions, 27 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll index 31bb2067d76..23c9302bef7 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll @@ -29,7 +29,8 @@ entry: ; FUNC-LABEL: {{^}}constant_load_v3i16: ; GCN: s_load_dwordx2 s -; EG-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 2, #1 ; EG-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1 define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { entry: @@ -186,15 +187,11 @@ define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace( ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}}, ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}}, ; EG: CF_END -; EG-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 -; EG-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 -; TODO: This should use DST, but for some there are redundant MOVs -; EG-DAG: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal +; EG-DAG: VTX_READ_16 [[ST_LO]].X, [[SRC:T[0-9]\.[XYZW]]], 0, #1 +; EG-DAG: VTX_READ_16 {{T[0-9]\.[XYZW]}}, [[SRC]], 2, #1 +; EG-DAG: VTX_READ_16 [[ST_HI]].X, [[SRC]], 4, #1 +; EG-DAG: LSHR {{[* ]*}}{{T[0-9]\.[XYZW]}}, {{T[0-9]\.[XYZW]}}, literal ; EG-DAG: 16 -; EG-DAG: AND_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, literal -; EG-DAG: AND_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, literal -; EG-DAG: 65535 -; EG-DAG: 65535 define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in @@ -209,11 +206,12 @@ entry: ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9].[XYZW]}}, ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9].[XYZW]}}, ; v3i16 is naturally 8 byte aligned -; EG-DAG: VTX_READ_32 [[DST_HI:T[0-9]\.[XYZW]]], [[PTR:T[0-9]\.[XYZW]]], 0, #1 -; EG-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 -; EG-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal -; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal -; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal +; EG-DAG: VTX_READ_16 [[ST_LO]].X, [[SRC:T[0-9]\.[XYZW]]], 0, #1 +; EG-DAG: VTX_READ_16 [[DST_MID:T[0-9]\.[XYZW]]], [[SRC]], 2, #1 +; EG-DAG: VTX_READ_16 [[ST_HI]].X, [[SRC]], 4, #1 +; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, [[ST_LO]].X, 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, [[DST_MID]], 0.0, literal +; EG-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, [[ST_HI]].X, 0.0, literal ; EG-DAG: 16 ; EG-DAG: 16 define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll index 703357beb52..6b0b4ea1224 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll @@ -34,7 +34,8 @@ entry: ; GCN-NOHSA: buffer_load_dwordx2 v ; GCN-HSA: flat_load_dwordx2 v -; EGCM-DAG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 +; EGCM-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 2, #1 ; EGCM-DAG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 4, #1 define amdgpu_kernel void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: @@ -195,10 +196,9 @@ define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1) ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, -; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 0, #1 -; EGCM-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9]\.[XYZW]}}, 4, #1 -; TODO: This should use DST, but for some there are redundant MOVs -; EGCM: LSHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal +; EGCM-DAG: VTX_READ_16 [[ST_LO]].X, [[SRC:T[0-9]\.[XYZW]]], 0, #1 +; EGCM-DAG: VTX_READ_16 {{T[0-9]\.[XYZW]}}, [[SRC]], 2, #1 +; EGCM-DAG: VTX_READ_16 [[ST_HI]].X, [[SRC]], 4, #1 ; EGCM: 16 define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { entry: @@ -216,11 +216,11 @@ entry: ; CM: MEM_RAT_CACHELESS STORE_DWORD [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}} ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_HI:T[0-9]]].X, {{T[0-9]\.[XYZW]}}, ; EG: MEM_RAT_CACHELESS STORE_RAW [[ST_LO:T[0-9]]].XY, {{T[0-9]\.[XYZW]}}, -; EGCM-DAG: VTX_READ_32 [[DST_LO:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 0, #1 -; EGCM-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], {{T[0-9].[XYZW]}}, 4, #1 -; TODO: This should use DST, but for some there are redundant MOVs -; EGCM-DAG: ASHR {{[* ]*}}[[ST_LO]].Y, {{T[0-9]\.[XYZW]}}, literal -; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, {{T[0-9]\.[XYZW]}}, 0.0, literal +; EGCM-DAG: VTX_READ_16 [[DST_LO:T[0-9]\.[XYZW]]], [[SRC:T[0-9].[XYZW]]], 0, #1 +; EGCM-DAG: VTX_READ_16 [[DST_MID:T[0-9]\.[XYZW]]], [[SRC]], 2, #1 +; EGCM-DAG: VTX_READ_16 [[DST_HI:T[0-9]\.[XYZW]]], [[SRC]], 4, #1 +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].X, [[DST_LO]], 0.0, literal +; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_LO]].Y, [[DST_MID]], 0.0, literal ; EGCM-DAG: BFE_INT {{[* ]*}}[[ST_HI]].X, [[DST_HI]], 0.0, literal ; EGCM-DAG: 16 ; EGCM-DAG: 16 diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll index 5913e7275e5..d8d7d98e308 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i16.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i16.ll @@ -52,7 +52,7 @@ entry: ; GCN-DAG: ds_write_b16 ; EG-DAG: LDS_USHORT_READ_RET -; EG-DAG: LDS_READ_RET +; EG-DAG: LDS_USHORT_READ_RET define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in @@ -235,7 +235,9 @@ define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* ; GCN-DAG: ds_write_b32 ; GCN-DAG: ds_write_b64 -; EG: LDS_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { entry: %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in @@ -252,7 +254,9 @@ entry: ; GCN-DAG: ds_write_b32 ; GCN-DAG: ds_write_b64 -; EG: LDS_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET +; EG: LDS_USHORT_READ_RET ; EG-DAG: BFE_INT ; EG-DAG: BFE_INT ; EG-DAG: BFE_INT |

