summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2016-02-05 19:47:23 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2016-02-05 19:47:23 +0000
commitcf84e26fb66e4ebab4a9338f4e59b768c612a0ca (patch)
treed2c8cdbaa26b784d881043b66e6714237fe9e907
parentfb9b6cd24b26a3e57b627be06415adf3a881435f (diff)
downloadbcm5719-llvm-cf84e26fb66e4ebab4a9338f4e59b768c612a0ca.tar.gz
bcm5719-llvm-cf84e26fb66e4ebab4a9338f4e59b768c612a0ca.zip
AMDGPU: Preserve alignments on new created globals
Also switch to internal linkage, and include the name of the function in the name. llvm-svn: 259911
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp12
-rw-r--r--llvm/test/CodeGen/AMDGPU/private-memory.ll28
-rw-r--r--llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll8
3 files changed, 41 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 9f085bfa09b..d0535500bc9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -500,10 +500,18 @@ void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) {
DEBUG(dbgs() << "Promoting alloca to local memory\n");
LocalMemAvailable -= AllocaSize;
+ Function *F = I.getParent()->getParent();
+
Type *GVTy = ArrayType::get(I.getAllocatedType(), 256);
GlobalVariable *GV = new GlobalVariable(
- *Mod, GVTy, false, GlobalValue::ExternalLinkage, 0, I.getName(), 0,
- GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS);
+ *Mod, GVTy, false, GlobalValue::InternalLinkage,
+ UndefValue::get(GVTy),
+ Twine(F->getName()) + Twine('.') + I.getName(),
+ nullptr,
+ GlobalVariable::NotThreadLocal,
+ AMDGPUAS::LOCAL_ADDRESS);
+ GV->setUnnamedAddr(true);
+ GV->setAlignment(I.getAlignment());
Value *TCntY, *TCntZ;
diff --git a/llvm/test/CodeGen/AMDGPU/private-memory.ll b/llvm/test/CodeGen/AMDGPU/private-memory.ll
index e8ff45344d6..b87944d7051 100644
--- a/llvm/test/CodeGen/AMDGPU/private-memory.ll
+++ b/llvm/test/CodeGen/AMDGPU/private-memory.ll
@@ -10,6 +10,10 @@
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
+; HSAOPT: @mova_same_clause.stack = internal unnamed_addr addrspace(3) global [256 x [5 x i32]] undef, align 4
+; HSAOPT: @high_alignment.stack = internal unnamed_addr addrspace(3) global [256 x [8 x i32]] undef, align 16
+
+
; FUNC-LABEL: {{^}}mova_same_clause:
; OPT-LABEL: @mova_same_clause(
@@ -59,7 +63,7 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; HSAOPT: [[ADD_YZ_X_X_YZ_SIZE:%[0-9]+]] = add i32 [[YZ_X_XID]], [[Y_X_Z_SIZE]]
; HSAOPT: [[ADD_ZID:%[0-9]+]] = add i32 [[ADD_YZ_X_X_YZ_SIZE]], [[WORKITEM_ID_Z]]
-; HSAOPT: [[LOCAL_GEP:%[0-9]+]] = getelementptr inbounds [256 x [5 x i32]], [256 x [5 x i32]] addrspace(3)* @stack, i32 0, i32 [[ADD_ZID]]
+; HSAOPT: [[LOCAL_GEP:%[0-9]+]] = getelementptr inbounds [256 x [5 x i32]], [256 x [5 x i32]] addrspace(3)* @mova_same_clause.stack, i32 0, i32 [[ADD_ZID]]
; HSAOPT: %arrayidx1 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}}
; HSAOPT: %arrayidx3 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 {{%[0-9]+}}
; HSAOPT: %arrayidx10 = getelementptr inbounds [5 x i32], [5 x i32] addrspace(3)* [[LOCAL_GEP]], i32 0, i32 0
@@ -91,6 +95,28 @@ entry:
ret void
}
+; OPT-LABEL: @high_alignment(
+; OPT: getelementptr inbounds [256 x [8 x i32]], [256 x [8 x i32]] addrspace(3)* @high_alignment.stack, i32 0, i32 %{{[0-9]+}}
+define void @high_alignment(i32 addrspace(1)* nocapture %out, i32 addrspace(1)* nocapture %in) {
+entry:
+ %stack = alloca [8 x i32], align 16
+ %0 = load i32, i32 addrspace(1)* %in, align 4
+ %arrayidx1 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %0
+ store i32 4, i32* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 1
+ %1 = load i32, i32 addrspace(1)* %arrayidx2, align 4
+ %arrayidx3 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 %1
+ store i32 5, i32* %arrayidx3, align 4
+ %arrayidx10 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 0
+ %2 = load i32, i32* %arrayidx10, align 4
+ store i32 %2, i32 addrspace(1)* %out, align 4
+ %arrayidx12 = getelementptr inbounds [8 x i32], [8 x i32]* %stack, i32 0, i32 1
+ %3 = load i32, i32* %arrayidx12
+ %arrayidx13 = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 1
+ store i32 %3, i32 addrspace(1)* %arrayidx13
+ ret void
+}
+
; FUNC-LABEL: {{^}}no_replace_inbounds_gep:
; OPT-LABEL: @no_replace_inbounds_gep(
; OPT: alloca [5 x i32]
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
index 038ffd196cd..67a96a1c8e8 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-mem-intrinsics.ll
@@ -11,7 +11,7 @@ declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0
declare i32 @llvm.objectsize.i32.p0i8(i8*, i1) #1
; CHECK-LABEL: @promote_with_memcpy(
-; CHECK: getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca, i32 0, i32 %{{[0-9]+}}
+; CHECK: getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_memcpy.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
; CHECK: call void @llvm.memcpy.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
define void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
@@ -25,7 +25,7 @@ define void @promote_with_memcpy(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
}
; CHECK-LABEL: @promote_with_memmove(
-; CHECK: getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca.1, i32 0, i32 %{{[0-9]+}}
+; CHECK: getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_memmove.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memmove.p3i8.p1i8.i32(i8 addrspace(3)* %alloca.bc, i8 addrspace(1)* %in.bc, i32 68, i32 4, i1 false)
; CHECK: call void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* %out.bc, i8 addrspace(3)* %alloca.bc, i32 68, i32 4, i1 false)
define void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
@@ -39,7 +39,7 @@ define void @promote_with_memmove(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
}
; CHECK-LABEL: @promote_with_memset(
-; CHECK: getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca.2, i32 0, i32 %{{[0-9]+}}
+; CHECK: getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_memset.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call void @llvm.memset.p3i8.i32(i8 addrspace(3)* %alloca.bc, i8 7, i32 68, i32 4, i1 false)
define void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
%alloca = alloca [17 x i32], align 16
@@ -51,7 +51,7 @@ define void @promote_with_memset(i32 addrspace(1)* %out, i32 addrspace(1)* %in)
}
; CHECK-LABEL: @promote_with_objectsize(
-; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @alloca.3, i32 0, i32 %{{[0-9]+}}
+; CHECK: [[PTR:%[0-9]+]] = getelementptr inbounds [256 x [17 x i32]], [256 x [17 x i32]] addrspace(3)* @promote_with_objectsize.alloca, i32 0, i32 %{{[0-9]+}}
; CHECK: call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %alloca.bc, i1 false)
define void @promote_with_objectsize(i32 addrspace(1)* %out) #0 {
%alloca = alloca [17 x i32], align 16
OpenPOWER on IntegriCloud