summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td48
-rw-r--r--llvm/lib/Target/AMDGPU/SIIntrinsics.td14
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll54
-rw-r--r--llvm/test/CodeGen/AMDGPU/mubuf.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll4
5 files changed, 5 insertions, 121 deletions
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index e48b73b0f1e..af45028cb26 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1422,54 +1422,6 @@ defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SHORT_D16_OFFEN, BUFFER_LOAD_SHORT_D
defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_UBYTE_D16_OFFEN, BUFFER_LOAD_UBYTE_D16_OFFSET, i16, az_extloadi8_private>;
defm : MUBUFScratchLoadPat_Lo16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D16_OFFSET, i16, sextloadi8_private>;
}
-
-// BUFFER_LOAD_DWORD*, addr64=0
-multiclass MUBUF_Load_Dword <ValueType vt,
- MUBUF_Pseudo offset,
- MUBUF_Pseudo offen,
- MUBUF_Pseudo idxen,
- MUBUF_Pseudo bothen> {
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
- imm:$offset, 0, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 0, imm:$glc, imm:$slc,
- imm:$tfe)),
- (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
- imm:$offset, 0, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
- (as_i1imm $slc), (as_i1imm $tfe))
- >;
-
- def : GCNPat <
- (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
- imm:$offset, 1, 1, imm:$glc, imm:$slc,
- imm:$tfe)),
- (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
- (as_i1imm $tfe))
- >;
-}
-
-defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
- BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
-defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
- BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
-defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
- BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
-
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is forst
diff --git a/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/llvm/lib/Target/AMDGPU/SIIntrinsics.td
index 7b7cf163505..3892f19d3df 100644
--- a/llvm/lib/Target/AMDGPU/SIIntrinsics.td
+++ b/llvm/lib/Target/AMDGPU/SIIntrinsics.td
@@ -34,18 +34,4 @@ let TargetPrefix = "SI", isTarget = 1 in {
llvm_i32_ty], // tfe(imm)
[]>;
- // Fully-flexible BUFFER_LOAD_DWORD_* except for the ADDR64 bit, which is not exposed
- def int_SI_buffer_load_dword : Intrinsic <
- [llvm_anyint_ty], // vdata(VGPR), overloaded for types i32, v2i32, v4i32
- [llvm_anyint_ty, // rsrc(SGPR)
- llvm_anyint_ty, // vaddr(VGPR)
- llvm_i32_ty, // soffset(SGPR)
- llvm_i32_ty, // inst_offset(imm)
- llvm_i32_ty, // offen(imm)
- llvm_i32_ty, // idxen(imm)
- llvm_i32_ty, // glc(imm)
- llvm_i32_ty, // slc(imm)
- llvm_i32_ty], // tfe(imm)
- [IntrReadMem, IntrArgMemOnly]>;
-
} // End TargetPrefix = "SI", isTarget = 1
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll b/llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
deleted file mode 100644
index 1d370aba6da..00000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.SI.load.dword.ll
+++ /dev/null
@@ -1,54 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs < %s | FileCheck %s
-
-; Example of a simple geometry shader loading vertex attributes from the
-; ESGS ring buffer
-
-; FIXME: Out of bounds immediate offset crashes
-
-; CHECK-LABEL: {{^}}main:
-; CHECK: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 glc slc
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc slc
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen glc slc
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen offen glc slc
-; CHECK: s_movk_i32 [[K:s[0-9]+]], 0x4d2 ; encoding
-; CHECK: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, [[K]] idxen offen offset:65535 glc slc
-
-define amdgpu_vs void @main([17 x <4 x i32>] addrspace(4)* byval %arg, [32 x <4 x i32>] addrspace(4)* byval %arg1, [16 x <32 x i8>] addrspace(4)* byval %arg2, [2 x <4 x i32>] addrspace(4)* byval %arg3, [17 x <4 x i32>] addrspace(4)* inreg %arg4, [17 x <4 x i32>] addrspace(4)* inreg %arg5, i32 %arg6, i32 %arg7, i32 %arg8, i32 %arg9) {
-main_body:
- %tmp = getelementptr [2 x <4 x i32>], [2 x <4 x i32>] addrspace(4)* %arg3, i64 0, i32 1
- %tmp10 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp, !tbaa !0
- %tmp11 = shl i32 %arg6, 2
- %tmp12 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0)
- %tmp13 = bitcast i32 %tmp12 to float
- %tmp14 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 %tmp11, i32 0, i32 0, i32 1, i32 0, i32 1, i32 1, i32 0)
- %tmp15 = bitcast i32 %tmp14 to float
- %tmp16 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp10, i32 %tmp11, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 0)
- %tmp17 = bitcast i32 %tmp16 to float
- %tmp18 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 0)
- %tmp19 = bitcast i32 %tmp18 to float
-
- %tmp20 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 0, i32 123, i32 1, i32 1, i32 1, i32 1, i32 0)
- %tmp21 = bitcast i32 %tmp20 to float
-
- %tmp22 = call i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32> %tmp10, <2 x i32> zeroinitializer, i32 1234, i32 65535, i32 1, i32 1, i32 1, i32 1, i32 0)
- %tmp23 = bitcast i32 %tmp22 to float
-
- call void @llvm.amdgcn.exp.f32(i32 15, i32 12, float %tmp13, float %tmp15, float %tmp17, float %tmp19, i1 false, i1 false)
- call void @llvm.amdgcn.exp.f32(i32 15, i32 12, float %tmp21, float %tmp23, float %tmp23, float %tmp23, i1 true, i1 false)
- ret void
-}
-
-; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readonly
-declare i32 @llvm.SI.buffer.load.dword.i32.v2i32(<4 x i32>, <2 x i32>, i32, i32, i32, i32, i32, i32, i32) #0
-
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind inaccessiblememonly }
-
-!0 = !{!"const", !1, i32 1}
-!1 = !{!"tbaa root"}
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll
index d5c22d22cca..9cc48b41d6d 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll
@@ -60,7 +60,7 @@ main_body:
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
%tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
%tmp2 = shl i32 %6, 2
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.dword.i32(<4 x i32> %tmp1, i32 %tmp2, i32 64, i32 1)
%tmp4 = add i32 %6, 16
%tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32>
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
@@ -79,7 +79,7 @@ main_body:
%tmp0 = getelementptr [6 x <4 x i32>], [6 x <4 x i32>] addrspace(4)* %0, i32 0, i32 0
%tmp1 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp0
%tmp2 = shl i32 %6, 2
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.dword.i32(<4 x i32> %tmp1, i32 %tmp2, i32 65, i32 1)
%tmp4 = add i32 %6, 16
%tmp1.4xi32 = bitcast <4 x i32> %tmp1 to <4 x i32>
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> %tmp1.4xi32, i32 0, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i1 1, i1 1)
@@ -176,7 +176,7 @@ define amdgpu_kernel void @store_vgpr_ptr(i32 addrspace(1)* %out) #0 {
ret void
}
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare i32 @llvm.amdgcn.raw.buffer.load.dword.i32(<4 x i32>, i32, i32, i32) #0
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1)
attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
index 5edc2c5c9b7..d93cde6a885 100644
--- a/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/scheduler-subrange-crash.ll
@@ -24,7 +24,7 @@ main_body:
%array_vector9 = insertelement <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, float %tmp1, i32 1
%array_vector10 = insertelement <4 x float> %array_vector9, float 0.000000e+00, i32 2
%array_vector11 = insertelement <4 x float> %array_vector10, float undef, i32 3
- %tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32> undef, i32 undef, i32 4864, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
+ %tmp3 = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> undef, i32 undef, i32 4864, i32 0)
call void @llvm.amdgcn.tbuffer.store.i32(i32 %tmp3, <4 x i32> undef, i32 0, i32 0, i32 %arg, i32 36, i32 4, i32 4, i1 1, i1 1)
%bc = bitcast <4 x float> %array_vector3 to <4 x i32>
%tmp4 = extractelement <4 x i32> %bc, i32 undef
@@ -46,7 +46,7 @@ main_body:
}
declare float @llvm.SI.load.const.v4i32(<4 x i32>, i32) #1
-declare i32 @llvm.SI.buffer.load.dword.i32.i32(<4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #2
+declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2
declare void @llvm.amdgcn.tbuffer.store.i32(i32, <4 x i32>, i32, i32, i32, i32, i32, i32, i1, i1) #3
attributes #0 = { nounwind "target-cpu"="tonga" }
OpenPOWER on IntegriCloud