diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/always-uniform.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll | 33 |
2 files changed, 54 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/always-uniform.ll b/llvm/test/CodeGen/AMDGPU/always-uniform.ll new file mode 100644 index 00000000000..4ba57fba81b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/always-uniform.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple amdgcn-amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.workitem.id.x() +declare i32 @llvm.amdgcn.readfirstlane(i32) + +; GCN-LABEL: readfirstlane_uniform +; GCN: s_load_dwordx2 s{{\[}}[[IN_ADDR:[0-9]+]]:1{{\]}}, s[4:5], 0x0 +; GCN: v_readfirstlane_b32 s[[SCALAR:[0-9]+]], v0 +; GCN: s_add_u32 s[[LOAD_ADDR:[0-9]+]], s[[IN_ADDR]], s[[SCALAR]] +; GCN: s_load_dword s{{[0-9]+}}, s{{\[}}[[LOAD_ADDR]] + +define amdgpu_kernel void @readfirstlane_uniform(float addrspace(1)* noalias nocapture readonly, float addrspace(1)* noalias nocapture readonly) { + %tid = tail call i32 @llvm.amdgcn.workitem.id.x() + %scalar = tail call i32 @llvm.amdgcn.readfirstlane(i32 %tid) + %idx = zext i32 %scalar to i64 + %gep0 = getelementptr inbounds float, float addrspace(1)* %0, i64 %idx + %val = load float, float addrspace(1)* %gep0, align 4 + %gep1 = getelementptr inbounds float, float addrspace(1)* %1, i64 10 + store float %val, float addrspace(1)* %gep1, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll index a6a04151caa..be6e3fd05ae 100644 --- a/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll +++ b/llvm/test/CodeGen/AMDGPU/global_smrd_cfg.ll @@ -72,6 +72,39 @@ bb22: ; preds = %bb20, %bb11 br i1 %tmp31, label %bb7, label %bb11 } +; one more test to ensure that aliasing store after the load +; is considered clobbering if load parent block is the same +; as a loop header block. + +; CHECK-LABEL: %bb1 + +; Load from %arg has alias store that is after the load +; but is considered clobbering because of the loop. + +; CHECK: flat_load_dword + +define amdgpu_kernel void @cfg_selfloop(i32 addrspace(1)* nocapture readonly %arg, i32 addrspace(1)* nocapture %arg1, i32 %arg2) #0 { +bb: + br label %bb1 + +bb2: + ret void + +bb1: + %tmp13 = phi i32 [ %tmp25, %bb1 ], [ 0, %bb ] + %tmp14 = srem i32 %tmp13, %arg2 + %tmp15 = sext i32 %tmp14 to i64 + %tmp16 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp15 + %tmp17 = load i32, i32 addrspace(1)* %tmp16, align 4, !tbaa !0 + %tmp19 = sext i32 %tmp13 to i64 + %tmp21 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 %tmp19 + store i32 %tmp17, i32 addrspace(1)* %tmp21, align 4, !tbaa !0 + %tmp25 = add nuw nsw i32 %tmp13, 1 + %tmp31 = icmp eq i32 %tmp25, 100 + br i1 %tmp31, label %bb2, label %bb1 +} + + attributes #0 = { "target-cpu"="fiji" } !0 = !{!1, !1, i64 0} |