summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll32
1 files changed, 16 insertions, 16 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index f839129fc3d..0600633aa27 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -12,7 +12,7 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
; GCN: buffer_store_dword [[CONV]],
define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
%load = load i8, i8 addrspace(1)* %gep, align 1
%cvt = uitofp i8 %load to float
store float %cvt, float addrspace(1)* %out, align 4
@@ -26,7 +26,7 @@ define amdgpu_kernel void @load_i8_to_f32(float addrspace(1)* noalias %out, i8 a
; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias %out, <2 x i8> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <2 x i8>, <2 x i8> addrspace(1)* %in, i32 %tid
%load = load <2 x i8>, <2 x i8> addrspace(1)* %gep, align 2
%cvt = uitofp <2 x i8> %load to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out, align 16
@@ -42,7 +42,7 @@ define amdgpu_kernel void @load_v2i8_to_v2f32(<2 x float> addrspace(1)* noalias
; GCN: buffer_store_dwordx2 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias %out, <3 x i8> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <3 x i8>, <3 x i8> addrspace(1)* %in, i32 %tid
%load = load <3 x i8>, <3 x i8> addrspace(1)* %gep, align 4
%cvt = uitofp <3 x i8> %load to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out, align 16
@@ -60,7 +60,7 @@ define amdgpu_kernel void @load_v3i8_to_v3f32(<3 x float> addrspace(1)* noalias
; GCN: buffer_store_dwordx4 v{{\[}}[[LORESULT]]:[[HIRESULT]]{{\]}},
define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
%load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 4
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@@ -86,7 +86,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32(<4 x float> addrspace(1)* noalias
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
%load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
%cvt = uitofp <4 x i8> %load to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out, align 16
@@ -135,7 +135,7 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
; GCN: s_endpgm
define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <7 x i8>, <7 x i8> addrspace(1)* %in, i32 %tid
%load = load <7 x i8>, <7 x i8> addrspace(1)* %gep, align 1
%cvt = uitofp <7 x i8> %load to <7 x float>
store <7 x float> %cvt, <7 x float> addrspace(1)* %out, align 16
@@ -160,7 +160,7 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias
; GCN: buffer_store_dwordx4
define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <8 x i8>, <8 x i8> addrspace(1)* %in, i32 %tid
%load = load <8 x i8>, <8 x i8> addrspace(1)* %gep, align 8
%cvt = uitofp <8 x i8> %load to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16
@@ -174,7 +174,7 @@ define amdgpu_kernel void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias
; GCN: buffer_store_dword [[CONV]],
define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%load = load i32, i32 addrspace(1)* %gep, align 4
%add = add i32 %load, 2
%inreg = and i32 %add, 255
@@ -186,7 +186,7 @@ define amdgpu_kernel void @i8_zext_inreg_i32_to_f32(float addrspace(1)* noalias
; GCN-LABEL: {{^}}i8_zext_inreg_hi1_to_f32:
define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%load = load i32, i32 addrspace(1)* %gep, align 4
%inreg = and i32 %load, 65280
%shr = lshr i32 %inreg, 8
@@ -200,7 +200,7 @@ define amdgpu_kernel void @i8_zext_inreg_hi1_to_f32(float addrspace(1)* noalias
; GCN-LABEL: {{^}}i8_zext_i32_to_f32:
define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out, i8 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i8, i8 addrspace(1)* %in, i32 %tid
%load = load i8, i8 addrspace(1)* %gep, align 1
%ext = zext i8 %load to i32
%cvt = uitofp i32 %ext to float
@@ -211,7 +211,7 @@ define amdgpu_kernel void @i8_zext_i32_to_f32(float addrspace(1)* noalias %out,
; GCN-LABEL: {{^}}v4i8_zext_v4i32_to_v4f32:
define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
+ %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid
%load = load <4 x i8>, <4 x i8> addrspace(1)* %gep, align 1
%ext = zext <4 x i8> %load to <4 x i32>
%cvt = uitofp <4 x i32> %ext to <4 x float>
@@ -226,7 +226,7 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%and = and i32 %val, 255
%cvt = uitofp i32 %and to float
@@ -241,7 +241,7 @@ define amdgpu_kernel void @extract_byte0_to_f32(float addrspace(1)* noalias %out
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%srl = lshr i32 %val, 8
%and = and i32 %srl, 255
@@ -257,7 +257,7 @@ define amdgpu_kernel void @extract_byte1_to_f32(float addrspace(1)* noalias %out
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%srl = lshr i32 %val, 16
%and = and i32 %srl, 255
@@ -273,7 +273,7 @@ define amdgpu_kernel void @extract_byte2_to_f32(float addrspace(1)* noalias %out
; GCN: buffer_store_dword [[CONV]]
define amdgpu_kernel void @extract_byte3_to_f32(float addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in) nounwind {
%tid = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
%val = load i32, i32 addrspace(1)* %gep
%srl = lshr i32 %val, 24
%and = and i32 %srl, 255
OpenPOWER on IntegriCloud