summaryrefslogtreecommitdiffstats
path: root/llvm/test
diff options
context:
space:
mode:
authorTim Renouf <tpr.llvm@botech.co.uk>2018-08-21 11:08:12 +0000
committerTim Renouf <tpr.llvm@botech.co.uk>2018-08-21 11:08:12 +0000
commitbb5ee41ab41bf0db99c2cea4c16c7eaeb0339965 (patch)
treed97ee50e5e74c1f3f61c1c929989ac3ae857f180 /llvm/test
parent4f703f5e116f49d369d0bf9fa9551382a4720e04 (diff)
downloadbcm5719-llvm-bb5ee41ab41bf0db99c2cea4c16c7eaeb0339965.tar.gz
bcm5719-llvm-bb5ee41ab41bf0db99c2cea4c16c7eaeb0339965.zip
[AMDGPU] Allow int types for MUBUF vdata
Summary: Previously the new llvm.amdgcn.raw/struct.buffer.load/store intrinsics only allowed float types for the data to be loaded or stored, which sometimes meant the frontend needed to generate a bitcast. In this, the new intrinsics copied the old buffer intrinsics. This commit extends the new intrinsics to allow int types as well. Subscribers: arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D50315 Change-Id: I8202af2d036455553681dcbb3d7d32ae273f8f85 llvm-svn: 340270
Diffstat (limited to 'llvm/test')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll22
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll16
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll22
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll16
4 files changed, 76 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
index 797700e5b4f..76695077f00 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll
@@ -198,9 +198,31 @@ main_body:
ret void
}
+;CHECK-LABEL: {{^}}buffer_load_int:
+;CHECK: buffer_load_dwordx4 v[0:3], off, s[0:3], 0
+;CHECK: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc
+;CHECK: buffer_load_dword v6, off, s[0:3], 0 slc
+;CHECK: s_waitcnt
+define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(<4 x i32> inreg) {
+main_body:
+ %data = call <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0)
+ %data_glc = call <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32> %0, i32 0, i32 0, i32 1)
+ %data_slc = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %0, i32 0, i32 0, i32 2)
+ %fdata = bitcast <4 x i32> %data to <4 x float>
+ %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
+ %fdata_slc = bitcast i32 %data_slc to float
+ %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
+ %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
+ %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
+ ret {<4 x float>, <2 x float>, float} %r2
+}
+
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #0
declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #0
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #0
+declare <2 x i32> @llvm.amdgcn.raw.buffer.load.v2i32(<4 x i32>, i32, i32, i32) #0
+declare <4 x i32> @llvm.amdgcn.raw.buffer.load.v4i32(<4 x i32>, i32, i32, i32) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
index 64865115966..afb2ef803f0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.store.ll
@@ -142,9 +142,25 @@ define amdgpu_ps void @buffer_store_x2_offset_merged(<4 x i32> inreg %rsrc, <2 x
ret void
}
+;CHECK-LABEL: {{^}}buffer_store_int:
+;CHECK-NOT: s_waitcnt
+;CHECK: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+;CHECK: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc
+;CHECK: buffer_store_dword v6, off, s[0:3], 0 slc
+define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) {
+main_body:
+ call void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0)
+ call void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 1)
+ call void @llvm.amdgcn.raw.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 2)
+ ret void
+}
+
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
+declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0
+declare void @llvm.amdgcn.raw.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32) #0
+declare void @llvm.amdgcn.raw.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32) #0
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #1
attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
index cd373e0796b..71fbdaa71b2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll
@@ -125,9 +125,31 @@ entry:
ret float %val
}
+;CHECK-LABEL: {{^}}buffer_load_int:
+;CHECK: buffer_load_dwordx4 v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen
+;CHECK: buffer_load_dwordx2 v[4:5], {{v[0-9]+}}, s[0:3], 0 idxen glc
+;CHECK: buffer_load_dword v6, {{v[0-9]+}}, s[0:3], 0 idxen slc
+;CHECK: s_waitcnt
+define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(<4 x i32> inreg) {
+main_body:
+ %data = call <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0)
+ %data_glc = call <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 1)
+ %data_slc = call i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 2)
+ %fdata = bitcast <4 x i32> %data to <4 x float>
+ %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float>
+ %fdata_slc = bitcast i32 %data_slc to float
+ %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0
+ %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1
+ %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2
+ ret {<4 x float>, <2 x float>, float} %r2
+}
+
declare float @llvm.amdgcn.struct.buffer.load.f32(<4 x i32>, i32, i32, i32, i32) #0
declare <2 x float> @llvm.amdgcn.struct.buffer.load.v2f32(<4 x i32>, i32, i32, i32, i32) #0
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.struct.buffer.load.i32(<4 x i32>, i32, i32, i32, i32) #0
+declare <2 x i32> @llvm.amdgcn.struct.buffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) #0
+declare <4 x i32> @llvm.amdgcn.struct.buffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
index 8e05c43889a..738bb16e7d6 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.store.ll
@@ -95,9 +95,25 @@ main_body:
ret void
}
+;CHECK-LABEL: {{^}}buffer_store_int:
+;CHECK-NOT: s_waitcnt
+;CHECK: buffer_store_dwordx4 v[0:3], {{v[0-9]+}}, s[0:3], 0 idxen
+;CHECK: buffer_store_dwordx2 v[4:5], {{v[0-9]+}}, s[0:3], 0 idxen glc
+;CHECK: buffer_store_dword v6, {{v[0-9]+}}, s[0:3], 0 idxen slc
+define amdgpu_ps void @buffer_store_int(<4 x i32> inreg, <4 x i32>, <2 x i32>, i32) {
+main_body:
+ call void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32> %1, <4 x i32> %0, i32 0, i32 0, i32 0, i32 0)
+ call void @llvm.amdgcn.struct.buffer.store.v2i32(<2 x i32> %2, <4 x i32> %0, i32 0, i32 0, i32 0, i32 1)
+ call void @llvm.amdgcn.struct.buffer.store.i32(i32 %3, <4 x i32> %0, i32 0, i32 0, i32 0, i32 2)
+ ret void
+}
+
declare void @llvm.amdgcn.struct.buffer.store.f32(float, <4 x i32>, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.struct.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32, i32) #0
declare void @llvm.amdgcn.struct.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32, i32) #0
+declare void @llvm.amdgcn.struct.buffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32) #0
+declare void @llvm.amdgcn.struct.buffer.store.v2i32(<2 x i32>, <4 x i32>, i32, i32, i32, i32) #0
+declare void @llvm.amdgcn.struct.buffer.store.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32, i32) #0
declare <4 x float> @llvm.amdgcn.struct.buffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) #1
attributes #0 = { nounwind }
OpenPOWER on IntegriCloud