diff options
| -rw-r--r-- | llvm/lib/Support/TargetParser.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/GCNProcessors.td | 4 | ||||
| -rw-r--r-- | llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll | 15 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/lower-kernargs.ll | 5 |
7 files changed, 35 insertions, 10 deletions
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp index 76401ac0a85..59e096b4b2c 100644 --- a/llvm/lib/Support/TargetParser.cpp +++ b/llvm/lib/Support/TargetParser.cpp @@ -169,12 +169,14 @@ void AMDGPU::fillValidArchListR600(SmallVectorImpl<StringRef> &Values) { } AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) { - if (GPU == "generic") - return {7, 0, 0}; - AMDGPU::GPUKind AK = parseArchAMDGCN(GPU); - if (AK == AMDGPU::GPUKind::GK_NONE) + if (AK == AMDGPU::GPUKind::GK_NONE) { + if (GPU == "generic-hsa") + return {7, 0, 0}; + if (GPU == "generic") + return {6, 0, 0}; return {0, 0, 0}; + } switch (AK) { case GK_GFX600: return {6, 0, 0}; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index cf90426574c..ee67edfae8b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -80,7 +80,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. - FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere @@ -155,7 +155,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, AMDGPUGenSubtargetInfo(TT, GPU, FS), AMDGPUSubtarget(TT), TargetTriple(TT), - Gen(SOUTHERN_ISLANDS), + Gen(TT.getOS() == Triple::AMDHSA ? SEA_ISLANDS : SOUTHERN_ISLANDS), InstrItins(getInstrItineraryForCPU(GPU)), LDSBankCount(0), MaxPrivateElementSize(0), diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 23ba6ce9334..cd7d7147409 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -306,8 +306,9 @@ static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { if (!GPU.empty()) return GPU; + // Need to default to a target with flat support for HSA. if (TT.getArch() == Triple::amdgcn) - return "generic"; + return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic"; return "r600"; } diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index fbbf1dea64c..021db78d48b 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -12,6 +12,10 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureWavefrontSize64] >; +def : ProcessorModel<"generic-hsa", NoSchedModel, + [FeatureWavefrontSize64, FeatureFlatAddressSpace] +>; + //===------------------------------------------------------------===// // GCN GFX6 (Southern Islands (SI)). //===------------------------------------------------------------===// diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll index cd601c7c929..325960ac9e7 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll @@ -1,7 +1,7 @@ ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s -; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s +; RUN: opt -cost-model -analyze -mtriple=amdgcn-mesa-mesa3d -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,FASTFP32DENORMS,FP16 %s diff --git a/llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll b/llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll new file mode 100644 index 00000000000..40161d8bb89 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/flat-error-unsupported-gpu-hsa.ll @@ -0,0 +1,15 @@ +; RUN: not llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; RUN: llc -mtriple=amdgcn-amd-amdhsa -o - %s | FileCheck -check-prefix=HSA-DEFAULT %s +; RUN: not llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx600 -filetype=obj -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; Flat instructions should not select if the target device doesn't +; support them. The default device should be able to select for HSA. + +; ERROR: LLVM ERROR: Cannot select: t{{[0-9]+}}: i32,ch = load<(volatile load 4 from %ir.flat.ptr.load)> +; HSA-DEFAULT: flat_load_dword +define amdgpu_kernel void @load_flat_i32(i32* %flat.ptr) { + %load = load volatile i32, i32* %flat.ptr, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll index 3a489d58b48..d6f84a52c84 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-kernargs.ll @@ -531,7 +531,10 @@ define amdgpu_kernel void @kern_lds_ptr(i32 addrspace(3)* %lds) #0 { define amdgpu_kernel void @kern_lds_ptr_si(i32 addrspace(3)* %lds) #2 { ; HSA-LABEL: @kern_lds_ptr_si( ; HSA-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(8) i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() -; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS:%.*]], align 4 +; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, i8 addrspace(4)* [[KERN_LDS_PTR_SI_KERNARG_SEGMENT]], i64 0 +; HSA-NEXT: [[LDS_KERNARG_OFFSET_CAST:%.*]] = bitcast i8 addrspace(4)* [[LDS_KERNARG_OFFSET]] to i32 addrspace(3)* addrspace(4)* +; HSA-NEXT: [[LDS_LOAD:%.*]] = load i32 addrspace(3)*, i32 addrspace(3)* addrspace(4)* [[LDS_KERNARG_OFFSET_CAST]], align 16, !invariant.load !0 +; HSA-NEXT: store i32 0, i32 addrspace(3)* [[LDS_LOAD]], align 4 ; HSA-NEXT: ret void ; ; MESA-LABEL: @kern_lds_ptr_si( |

