summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorYaxun Liu <Yaxun.Liu@amd.com>2017-11-10 01:53:24 +0000
committerYaxun Liu <Yaxun.Liu@amd.com>2017-11-10 01:53:24 +0000
commit920cc2f813c67e31fd3d2b2a92b81f037180243b (patch)
tree1cfd2e69924368edc86ab621bf278f5637e63316 /llvm
parent9278019eb34f787b92b26d44e1b1a1458b29a29b (diff)
downloadbcm5719-llvm-920cc2f813c67e31fd3d2b2a92b81f037180243b.tar.gz
bcm5719-llvm-920cc2f813c67e31fd3d2b2a92b81f037180243b.zip
[AMDGPU] Fix pointer info for pseudo source for r600
The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.cpp18
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.h3
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-constant-i1.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-global-i1.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/load-local-i1.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/vector-alloca.ll157
6 files changed, 109 insertions, 87 deletions
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 15dcf650d9a..9b78bc88ff3 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -1493,3 +1493,21 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
FlagOp.setImm(InstFlags);
}
}
+
+unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
+ PseudoSourceValue::PSVKind Kind) const {
+ switch (Kind) {
+ case PseudoSourceValue::Stack:
+ case PseudoSourceValue::FixedStack:
+ return AMDGPUASI.PRIVATE_ADDRESS;
+ case PseudoSourceValue::ConstantPool:
+ case PseudoSourceValue::GOT:
+ case PseudoSourceValue::JumpTable:
+ case PseudoSourceValue::GlobalValueCallEntry:
+ case PseudoSourceValue::ExternalSymbolCallEntry:
+ case PseudoSourceValue::TargetCustom:
+ return AMDGPUASI.CONSTANT_ADDRESS;
+ }
+ llvm_unreachable("Invalid pseudo source kind");
+ return AMDGPUASI.PRIVATE_ADDRESS;
+}
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index 3b828006807..abaa3745075 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -318,6 +318,9 @@ public:
bool isRegisterLoad(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD;
}
+
+ unsigned getAddressSpaceForPseudoSourceKind(
+ PseudoSourceValue::PSVKind Kind) const override;
};
namespace AMDGPU {
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
index c8abe5c77ee..789b5e9734d 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}constant_load_i1:
; GCN: buffer_load_ubyte
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
index cb3536a0c12..b35922bac23 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}global_load_i1:
; GCN: buffer_load_ubyte
diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
index e8f134b1fb2..089ac371169 100644
--- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
@@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; FUNC-LABEL: {{^}}local_load_i1:
; GCN: ds_read_u8
diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll
index a0aac8c1d9b..fbdfe48cd38 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll
@@ -1,9 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
+; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
+target datalayout = "A5"
; OPT-LABEL: @vector_read(
; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
@@ -17,17 +18,17 @@
; EG: MOVA_INT
define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [4 x i32]
- %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
- %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
- %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
- %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
- store i32 0, i32* %x
- store i32 1, i32* %y
- store i32 2, i32* %z
- store i32 3, i32* %w
- %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
- %tmp2 = load i32, i32* %tmp1
+ %tmp = alloca [4 x i32], addrspace(5)
+ %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+ store i32 0, i32 addrspace(5)* %x
+ store i32 1, i32 addrspace(5)* %y
+ store i32 2, i32 addrspace(5)* %z
+ store i32 3, i32 addrspace(5)* %w
+ %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
+ %tmp2 = load i32, i32 addrspace(5)* %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
}
@@ -46,19 +47,19 @@ entry:
; EG: MOVA_INT
define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
- %tmp = alloca [4 x i32]
- %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
- %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
- %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
- %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
- store i32 0, i32* %x
- store i32 0, i32* %y
- store i32 0, i32* %z
- store i32 0, i32* %w
- %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index
- store i32 1, i32* %tmp1
- %tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index
- %tmp3 = load i32, i32* %tmp2
+ %tmp = alloca [4 x i32], addrspace(5)
+ %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+ store i32 0, i32 addrspace(5)* %x
+ store i32 0, i32 addrspace(5)* %y
+ store i32 0, i32 addrspace(5)* %z
+ store i32 0, i32 addrspace(5)* %w
+ %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index
+ store i32 1, i32 addrspace(5)* %tmp1
+ %tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index
+ %tmp3 = load i32, i32 addrspace(5)* %tmp2
store i32 %tmp3, i32 addrspace(1)* %out
ret void
}
@@ -73,19 +74,19 @@ entry:
; EG: STORE_RAW
define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
entry:
- %tmp = alloca [4 x i32]
- %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
- %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
- %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
- %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
- store i32 0, i32* %x
- store i32 0, i32* %y
- store i32 0, i32* %z
- store i32 0, i32* %w
- %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
- %tmp2 = bitcast i32* %tmp1 to [4 x i32]*
- %tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0
- %tmp4 = load i32, i32* %tmp3
+ %tmp = alloca [4 x i32], addrspace(5)
+ %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+ store i32 0, i32 addrspace(5)* %x
+ store i32 0, i32 addrspace(5)* %y
+ store i32 0, i32 addrspace(5)* %z
+ store i32 0, i32 addrspace(5)* %w
+ %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ %tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)*
+ %tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0
+ %tmp4 = load i32, i32 addrspace(5)* %tmp3
store i32 %tmp4, i32 addrspace(1)* %out
ret void
}
@@ -95,18 +96,18 @@ entry:
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [4 x i32]
- %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
- %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
- %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
- %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
- %bc = bitcast i32* %x to float*
- store float 1.0, float* %bc
- store i32 1, i32* %y
- store i32 2, i32* %z
- store i32 3, i32* %w
- %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
- %tmp2 = load i32, i32* %tmp1
+ %tmp = alloca [4 x i32], addrspace(5)
+ %x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ %z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+ %w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+ %bc = bitcast i32 addrspace(5)* %x to float addrspace(5)*
+ store float 1.0, float addrspace(5)* %bc
+ store i32 1, i32 addrspace(5)* %y
+ store i32 2, i32 addrspace(5)* %z
+ store i32 3, i32 addrspace(5)* %w
+ %tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
+ %tmp2 = load i32, i32 addrspace(5)* %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
}
@@ -123,18 +124,18 @@ entry:
; OPT: load float
define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [4 x i32]
- %tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]*
- %x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0
- %y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1
- %z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2
- %w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3
- store float 0.0, float* %x
- store float 1.0, float* %y
- store float 2.0, float* %z
- store float 4.0, float* %w
- %tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index
- %tmp2 = load float, float* %tmp1
+ %tmp = alloca [4 x i32], addrspace(5)
+ %tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)*
+ %x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0
+ %y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1
+ %z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2
+ %w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3
+ store float 0.0, float addrspace(5)* %x
+ store float 1.0, float addrspace(5)* %y
+ store float 2.0, float addrspace(5)* %z
+ store float 4.0, float addrspace(5)* %w
+ %tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index
+ %tmp2 = load float, float addrspace(5)* %tmp1
store float %tmp2, float addrspace(1)* %out
ret void
}
@@ -146,17 +147,17 @@ entry:
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) {
entry:
- %tmp = alloca [4 x i32]
- %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
- %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
- %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
- %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
- store i32 0, i32* %x
- store i32 1, i32* %y
- store i32 2, i32* %z
- store i32 3, i32* %w
- %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
- %tmp2 = load i32, i32* %tmp1
+ %tmp = alloca [4 x i32], addrspace(5)
+ %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+ %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+ %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+ %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+ store i32 0, i32 addrspace(5)* %x
+ store i32 1, i32 addrspace(5)* %y
+ store i32 2, i32 addrspace(5)* %z
+ store i32 3, i32 addrspace(5)* %w
+ %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
+ %tmp2 = load i32, i32 addrspace(5)* %tmp1
store i32 %tmp2, i32 addrspace(1)* %out
ret void
}
OpenPOWER on IntegriCloud