diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 32 |
1 files changed, 16 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index f839129fc3d..0600633aa27 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone @@ -12,7 +12,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone ; GCN: buffer_store_dword [[CONV]], define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid + %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid %load = load i8, i8 addrspace(1)* %gep, align 1 %cvt = uitofp i8 %load to float store float %cvt, float addrspace(1)* %out, align 4 @@ -26,7 +26,7 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a ; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid %load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2 %cvt = uitofp <2 x i8> %load to <2 x float> store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16 @@ -42,7 +42,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias ; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid %load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4 %cvt = uitofp <3 x i8> %load to <3 x float> store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16 @@ -60,7 +60,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias ; GCN: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}}, define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4 %cvt = uitofp <4 x i8> %load to <4 x float> store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 @@ -86,7 +86,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias ; GCN: buffer_store_dwordx4 define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 %cvt = uitofp <4 x i8> %load to <4 x float> store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16 @@ -135,7 +135,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n ; GCN: s_endpgm define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid %load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1 %cvt = uitofp <7 x i8> %load to <7 x float> store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16 @@ -160,7 +160,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias ; GCN: buffer_store_dwordx4 define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid %load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 @@ -174,7 +174,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias ; GCN: buffer_store_dword [[CONV]], define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %load = load i32, i32 addrspace(1)* %gep, align 4 %add = add i32 %load, 2 %inreg = and i32 %add, 255 @@ -186,7 +186,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias ; GCN-LABEL: {{^}}i8_zext_inreg_hi1_to_f32: define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %load = load i32, i32 addrspace(1)* %gep, align 4 %inreg = and i32 %load, 65280 %shr = lshr i32 %inreg, 8 @@ -200,7 +200,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias ; GCN-LABEL: {{^}}i8_zext_i32_to_f32: define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid + %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid %load = load i8, i8 addrspace(1)* %gep, align 1 %ext = zext i8 %load to i32 %cvt = uitofp i32 %ext to float @@ -211,7 +211,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, ; GCN-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32: define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid + %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid %load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1 %ext = zext <4 x i8> %load to <4 x i32> %cvt = uitofp <4 x i32> %ext to <4 x float> @@ -226,7 +226,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %and = and i32 %val, 255 %cvt = uitofp i32 %and to float @@ -241,7 +241,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %srl = lshr i32 %val, 8 %and = and i32 %srl, 255 @@ -257,7 +257,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %srl = lshr i32 %val, 16 %and = and i32 %srl, 255 @@ -273,7 +273,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out ; GCN: buffer_store_dword [[CONV]] define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind { %tid = call i32 @llvm.amdgcn.workitem.id.x() - %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid + %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid %val = load i32, i32 addrspace(1)* %gep %srl = lshr i32 %val, 24 %and = and i32 %srl, 255 |