diff options
| author | Yaxun Liu <Yaxun.Liu@amd.com> | 2017-11-10 01:53:24 +0000 |
|---|---|---|
| committer | Yaxun Liu <Yaxun.Liu@amd.com> | 2017-11-10 01:53:24 +0000 |
| commit | 920cc2f813c67e31fd3d2b2a92b81f037180243b (patch) | |
| tree | 1cfd2e69924368edc86ab621bf278f5637e63316 /llvm | |
| parent | 9278019eb34f787b92b26d44e1b1a1458b29a29b (diff) | |
| download | bcm5719-llvm-920cc2f813c67e31fd3d2b2a92b81f037180243b.tar.gz bcm5719-llvm-920cc2f813c67e31fd3d2b2a92b81f037180243b.zip | |
[AMDGPU] Fix pointer info for pseudo source for r600
The pointer info for pseudo source for r600 is not correct when
alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz.
This patch fixes that.
Differential Revision: https://reviews.llvm.org/D39670
llvm-svn: 317861
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600InstrInfo.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/R600InstrInfo.h | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-constant-i1.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-global-i1.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/load-local-i1.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/vector-alloca.ll | 157 |
6 files changed, 109 insertions, 87 deletions
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp index 15dcf650d9a..9b78bc88ff3 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -1493,3 +1493,21 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand, FlagOp.setImm(InstFlags); } } + +unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind( + PseudoSourceValue::PSVKind Kind) const { + switch (Kind) { + case PseudoSourceValue::Stack: + case PseudoSourceValue::FixedStack: + return AMDGPUASI.PRIVATE_ADDRESS; + case PseudoSourceValue::ConstantPool: + case PseudoSourceValue::GOT: + case PseudoSourceValue::JumpTable: + case PseudoSourceValue::GlobalValueCallEntry: + case PseudoSourceValue::ExternalSymbolCallEntry: + case PseudoSourceValue::TargetCustom: + return AMDGPUASI.CONSTANT_ADDRESS; + } + llvm_unreachable("Invalid pseudo source kind"); + return AMDGPUASI.PRIVATE_ADDRESS; +} diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/llvm/lib/Target/AMDGPU/R600InstrInfo.h index 3b828006807..abaa3745075 100644 --- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h +++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h @@ -318,6 +318,9 @@ public: bool isRegisterLoad(const MachineInstr &MI) const { return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD; } + + unsigned getAddressSpaceForPseudoSourceKind( + PseudoSourceValue::PSVKind Kind) const override; }; namespace AMDGPU { diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll index c8abe5c77ee..789b5e9734d 100644 --- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}constant_load_i1: ; GCN: buffer_load_ubyte diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll index cb3536a0c12..b35922bac23 100644 --- a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}global_load_i1: ; GCN: buffer_load_ubyte diff --git a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll index e8f134b1fb2..089ac371169 100644 --- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll @@ -1,6 +1,6 @@ -; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s ; FUNC-LABEL: {{^}}local_load_i1: ; GCN: ds_read_u8 diff --git a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll index a0aac8c1d9b..fbdfe48cd38 100644 --- a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll +++ b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll @@ -1,9 +1,10 @@ -; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s -; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s +; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s +target datalayout = "A5" ; OPT-LABEL: @vector_read( ; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index @@ -17,17 +18,17 @@ ; EG: MOVA_INT define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) { entry: - %tmp = alloca [4 x i32] - %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 - %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 - %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 - %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 - store i32 0, i32* %x - store i32 1, i32* %y - store i32 2, i32* %z - store i32 3, i32* %w - %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index - %tmp2 = load i32, i32* %tmp1 + %tmp = alloca [4 x i32], addrspace(5) + %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 + store i32 0, i32 addrspace(5)* %x + store i32 1, i32 addrspace(5)* %y + store i32 2, i32 addrspace(5)* %z + store i32 3, i32 addrspace(5)* %w + %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index + %tmp2 = load i32, i32 addrspace(5)* %tmp1 store i32 %tmp2, i32 addrspace(1)* %out ret void } @@ -46,19 +47,19 @@ entry: ; EG: MOVA_INT define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { entry: - %tmp = alloca [4 x i32] - %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 - %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 - %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 - %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 - store i32 0, i32* %x - store i32 0, i32* %y - store i32 0, i32* %z - store i32 0, i32* %w - %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index - store i32 1, i32* %tmp1 - %tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index - %tmp3 = load i32, i32* %tmp2 + %tmp = alloca [4 x i32], addrspace(5) + %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 + store i32 0, i32 addrspace(5)* %x + store i32 0, i32 addrspace(5)* %y + store i32 0, i32 addrspace(5)* %z + store i32 0, i32 addrspace(5)* %w + %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index + store i32 1, i32 addrspace(5)* %tmp1 + %tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index + %tmp3 = load i32, i32 addrspace(5)* %tmp2 store i32 %tmp3, i32 addrspace(1)* %out ret void } @@ -73,19 +74,19 @@ entry: ; EG: STORE_RAW define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) { entry: - %tmp = alloca [4 x i32] - %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 - %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 - %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 - %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 - store i32 0, i32* %x - store i32 0, i32* %y - store i32 0, i32* %z - store i32 0, i32* %w - %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 - %tmp2 = bitcast i32* %tmp1 to [4 x i32]* - %tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0 - %tmp4 = load i32, i32* %tmp3 + %tmp = alloca [4 x i32], addrspace(5) + %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 + store i32 0, i32 addrspace(5)* %x + store i32 0, i32 addrspace(5)* %y + store i32 0, i32 addrspace(5)* %z + store i32 0, i32 addrspace(5)* %w + %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 + %tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)* + %tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0 + %tmp4 = load i32, i32 addrspace(5)* %tmp3 store i32 %tmp4, i32 addrspace(1)* %out ret void } @@ -95,18 +96,18 @@ entry: ; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) { entry: - %tmp = alloca [4 x i32] - %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 - %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 - %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 - %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 - %bc = bitcast i32* %x to float* - store float 1.0, float* %bc - store i32 1, i32* %y - store i32 2, i32* %z - store i32 3, i32* %w - %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index - %tmp2 = load i32, i32* %tmp1 + %tmp = alloca [4 x i32], addrspace(5) + %x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 + %y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 + %z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 + %w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 + %bc = bitcast i32 addrspace(5)* %x to float addrspace(5)* + store float 1.0, float addrspace(5)* %bc + store i32 1, i32 addrspace(5)* %y + store i32 2, i32 addrspace(5)* %z + store i32 3, i32 addrspace(5)* %w + %tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index + %tmp2 = load i32, i32 addrspace(5)* %tmp1 store i32 %tmp2, i32 addrspace(1)* %out ret void } @@ -123,18 +124,18 @@ entry: ; OPT: load float define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) { entry: - %tmp = alloca [4 x i32] - %tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]* - %x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0 - %y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1 - %z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2 - %w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3 - store float 0.0, float* %x - store float 1.0, float* %y - store float 2.0, float* %z - store float 4.0, float* %w - %tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index - %tmp2 = load float, float* %tmp1 + %tmp = alloca [4 x i32], addrspace(5) + %tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)* + %x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0 + %y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1 + %z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2 + %w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3 + store float 0.0, float addrspace(5)* %x + store float 1.0, float addrspace(5)* %y + store float 2.0, float addrspace(5)* %z + store float 4.0, float addrspace(5)* %w + %tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index + %tmp2 = load float, float addrspace(5)* %tmp1 store float %tmp2, float addrspace(1)* %out ret void } @@ -146,17 +147,17 @@ entry: ; OPT: store i32 %0, i32 addrspace(1)* %out, align 4 define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) { entry: - %tmp = alloca [4 x i32] - %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0 - %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1 - %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2 - %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3 - store i32 0, i32* %x - store i32 1, i32* %y - store i32 2, i32* %z - store i32 3, i32* %w - %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index - %tmp2 = load i32, i32* %tmp1 + %tmp = alloca [4 x i32], addrspace(5) + %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0 + %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1 + %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2 + %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3 + store i32 0, i32 addrspace(5)* %x + store i32 1, i32 addrspace(5)* %y + store i32 2, i32 addrspace(5)* %z + store i32 3, i32 addrspace(5)* %w + %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index + %tmp2 = load i32, i32 addrspace(5)* %tmp1 store i32 %tmp2, i32 addrspace(1)* %out ret void } |

