diff options
| author | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-11-28 17:51:08 +0000 | 
|---|---|---|
| committer | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-11-28 17:51:08 +0000 | 
| commit | 06ae4ec78ed189a06a0e376fb57abfeff3f7aad6 (patch) | |
| tree | c98b1ddc6726af4032e4ede803994dd06df539fc | |
| parent | 501129895854ee59d6b9e50e87c9644af2fc547d (diff) | |
| download | bcm5719-llvm-06ae4ec78ed189a06a0e376fb57abfeff3f7aad6.tar.gz bcm5719-llvm-06ae4ec78ed189a06a0e376fb57abfeff3f7aad6.zip  | |
AMDGPU: Add num spilled s/vgprs to metadata
This was requested by tools.
Differential Revision: https://reviews.llvm.org/D40321
llvm-svn: 319192
| -rw-r--r-- | llvm/docs/AMDGPUUsage.rst | 10 | ||||
| -rw-r--r-- | llvm/include/llvm/Support/AMDGPUMetadata.h | 8 | ||||
| -rw-r--r-- | llvm/lib/Support/AMDGPUMetadata.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll | 142 | ||||
| -rw-r--r-- | llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s | 4 | 
6 files changed, 153 insertions, 17 deletions
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index 1cf30304dfc..ecb0c11dbcb 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -1295,6 +1295,16 @@ non-AMD key names should be prefixed by "*vendor-name*.".                                                             code is capable of                                                             supporting XNACK. See                                                             :ref:`amdgpu-target-features`. +     "NumSpilledSGPRs"            integer                  Number of stores from +                                                           a scalar register to +                                                           a register allocator +                                                           created spill +                                                           location. +     "NumSpilledVGPRs"            integer                  Number of stores from +                                                           a vector register to +                                                           a register allocator +                                                           created spill +                                                           location.       ============================ ============== ========= =====================  .. diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h index 0c8d0228773..00039a75c51 100644 --- a/llvm/include/llvm/Support/AMDGPUMetadata.h +++ b/llvm/include/llvm/Support/AMDGPUMetadata.h @@ -244,6 +244,10 @@ constexpr char MaxFlatWorkGroupSize[] = "MaxFlatWorkGroupSize";  constexpr char IsDynamicCallStack[] = "IsDynamicCallStack";  /// \brief Key for Kernel::CodeProps::Metadata::mIsXNACKEnabled.  constexpr char IsXNACKEnabled[] = "IsXNACKEnabled"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledSGPRs. +constexpr char NumSpilledSGPRs[] = "NumSpilledSGPRs"; +/// \brief Key for Kernel::CodeProps::Metadata::mNumSpilledVGPRs. +constexpr char NumSpilledVGPRs[] = "NumSpilledVGPRs";  } // end namespace Key  /// \brief In-memory representation of kernel code properties metadata. @@ -275,6 +279,10 @@ struct Metadata final {    /// \brief True if the generated machine code is capable of supporting XNACK.    /// Optional.    bool mIsXNACKEnabled = false; +  /// \brief Number of SGPRs spilled by a wavefront. Optional. +  uint16_t mNumSpilledSGPRs = 0; +  /// \brief Number of VGPRs spilled by a workitem. Optional. +  uint16_t mNumSpilledVGPRs = 0;    /// \brief Default constructor.    Metadata() = default; diff --git a/llvm/lib/Support/AMDGPUMetadata.cpp b/llvm/lib/Support/AMDGPUMetadata.cpp index ec2714cfc1c..ddb25935e0e 100644 --- a/llvm/lib/Support/AMDGPUMetadata.cpp +++ b/llvm/lib/Support/AMDGPUMetadata.cpp @@ -148,6 +148,10 @@ struct MappingTraits<Kernel::CodeProps::Metadata> {                      MD.mIsDynamicCallStack, false);      YIO.mapOptional(Kernel::CodeProps::Key::IsXNACKEnabled,                      MD.mIsXNACKEnabled, false); +    YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledSGPRs, +                    MD.mNumSpilledSGPRs, uint16_t(0)); +    YIO.mapOptional(Kernel::CodeProps::Key::NumSpilledVGPRs, +                    MD.mNumSpilledVGPRs, uint16_t(0));    }  }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 563ca0d236a..9c87ecada77 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1188,6 +1188,8 @@ AMDGPU::HSAMD::Kernel::CodeProps::Metadata AMDGPUAsmPrinter::getHSACodeProps(    HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();    HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;    HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled(); +  HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs(); +  HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();    return HSACodeProps;  } diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll index 2d02b46e479..f4a914adddb 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll @@ -1,26 +1,26 @@  ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s -; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx800 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX800 --check-prefix=NOTES %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s  ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s +@var = addrspace(1) global float 0.0 +  ; CHECK: ---  ; CHECK:  Version: [ 1, 0 ] -  ; CHECK:  Kernels: -; CHECK:    - Name:       test -; CHECK:      SymbolName: 'test@kd' -; CHECK:      CodeProps: -; CHECK:        KernargSegmentSize:      24 -; CHECK:        GroupSegmentFixedSize:   0 -; CHECK:        PrivateSegmentFixedSize: 0 -; CHECK:        KernargSegmentAlign:     8 -; CHECK:        WavefrontSize:           64 -; GFX700:       NumSGPRs:                6 -; GFX800:       NumSGPRs:                96 -; GFX900:       NumSGPRs:                6 -; GFX700:       NumVGPRs:                4 -; GFX800:       NumVGPRs:                6 -; GFX900:       NumVGPRs:                6 -; CHECK:        MaxFlatWorkGroupSize:    256 + +; CHECK: - Name:       test +; CHECK:   SymbolName: 'test@kd' +; CHECK:   CodeProps: +; CHECK:     KernargSegmentSize:      24 +; CHECK:     GroupSegmentFixedSize:   0 +; CHECK:     PrivateSegmentFixedSize: 0 +; CHECK:     KernargSegmentAlign:     8 +; CHECK:     WavefrontSize:           64 +; CHECK:     NumSGPRs:                6 +; GFX700:    NumVGPRs:                4 +; GFX803:    NumVGPRs:                6 +; GFX900:    NumVGPRs:                6 +; CHECK:     MaxFlatWorkGroupSize:    256  define amdgpu_kernel void @test(      half addrspace(1)* %r,      half addrspace(1)* %a, @@ -32,3 +32,111 @@ entry:    store half %r.val, half addrspace(1)* %r    ret void  } + +; CHECK: - Name:       num_spilled_sgprs +; CHECK:   SymbolName: 'num_spilled_sgprs@kd' +; CHECK:   CodeProps: +; CHECK:     NumSpilledSGPRs: 41 +define amdgpu_kernel void @num_spilled_sgprs( +    i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %out2, +    i32 addrspace(1)* %out3, i32 addrspace(1)* %out4, i32 addrspace(1)* %out5, +    i32 addrspace(1)* %out6, i32 addrspace(1)* %out7, i32 addrspace(1)* %out8, +    i32 addrspace(1)* %out9, i32 addrspace(1)* %outa, i32 addrspace(1)* %outb, +    i32 addrspace(1)* %outc, i32 addrspace(1)* %outd, i32 addrspace(1)* %oute, +    i32 addrspace(1)* %outf, i32 %in0, i32 %in1, i32 %in2, i32 %in3, i32 %in4, +    i32 %in5, i32 %in6, i32 %in7, i32 %in8, i32 %in9, i32 %ina, i32 %inb, +    i32 %inc, i32 %ind, i32 %ine, i32 %inf) #0 { +entry: +  store i32 %in0, i32 addrspace(1)* %out0 +  store i32 %in1, i32 addrspace(1)* %out1 +  store i32 %in2, i32 addrspace(1)* %out2 +  store i32 %in3, i32 addrspace(1)* %out3 +  store i32 %in4, i32 addrspace(1)* %out4 +  store i32 %in5, i32 addrspace(1)* %out5 +  store i32 %in6, i32 addrspace(1)* %out6 +  store i32 %in7, i32 addrspace(1)* %out7 +  store i32 %in8, i32 addrspace(1)* %out8 +  store i32 %in9, i32 addrspace(1)* %out9 +  store i32 %ina, i32 addrspace(1)* %outa +  store i32 %inb, i32 addrspace(1)* %outb +  store i32 %inc, i32 addrspace(1)* %outc +  store i32 %ind, i32 addrspace(1)* %outd +  store i32 %ine, i32 addrspace(1)* %oute +  store i32 %inf, i32 addrspace(1)* %outf +  ret void +} + +; CHECK: - Name:       num_spilled_vgprs +; CHECK:   SymbolName: 'num_spilled_vgprs@kd' +; CHECK:   CodeProps: +; CHECK:     NumSpilledVGPRs: 14 +define amdgpu_kernel void @num_spilled_vgprs() #1 { +  %val0 = load volatile float, float addrspace(1)* @var +  %val1 = load volatile float, float addrspace(1)* @var +  %val2 = load volatile float, float addrspace(1)* @var +  %val3 = load volatile float, float addrspace(1)* @var +  %val4 = load volatile float, float addrspace(1)* @var +  %val5 = load volatile float, float addrspace(1)* @var +  %val6 = load volatile float, float addrspace(1)* @var +  %val7 = load volatile float, float addrspace(1)* @var +  %val8 = load volatile float, float addrspace(1)* @var +  %val9 = load volatile float, float addrspace(1)* @var +  %val10 = load volatile float, float addrspace(1)* @var +  %val11 = load volatile float, float addrspace(1)* @var +  %val12 = load volatile float, float addrspace(1)* @var +  %val13 = load volatile float, float addrspace(1)* @var +  %val14 = load volatile float, float addrspace(1)* @var +  %val15 = load volatile float, float addrspace(1)* @var +  %val16 = load volatile float, float addrspace(1)* @var +  %val17 = load volatile float, float addrspace(1)* @var +  %val18 = load volatile float, float addrspace(1)* @var +  %val19 = load volatile float, float addrspace(1)* @var +  %val20 = load volatile float, float addrspace(1)* @var +  %val21 = load volatile float, float addrspace(1)* @var +  %val22 = load volatile float, float addrspace(1)* @var +  %val23 = load volatile float, float addrspace(1)* @var +  %val24 = load volatile float, float addrspace(1)* @var +  %val25 = load volatile float, float addrspace(1)* @var +  %val26 = load volatile float, float addrspace(1)* @var +  %val27 = load volatile float, float addrspace(1)* @var +  %val28 = load volatile float, float addrspace(1)* @var +  %val29 = load volatile float, float addrspace(1)* @var +  %val30 = load volatile float, float addrspace(1)* @var + +  store volatile float %val0, float addrspace(1)* @var +  store volatile float %val1, float addrspace(1)* @var +  store volatile float %val2, float addrspace(1)* @var +  store volatile float %val3, float addrspace(1)* @var +  store volatile float %val4, float addrspace(1)* @var +  store volatile float %val5, float addrspace(1)* @var +  store volatile float %val6, float addrspace(1)* @var +  store volatile float %val7, float addrspace(1)* @var +  store volatile float %val8, float addrspace(1)* @var +  store volatile float %val9, float addrspace(1)* @var +  store volatile float %val10, float addrspace(1)* @var +  store volatile float %val11, float addrspace(1)* @var +  store volatile float %val12, float addrspace(1)* @var +  store volatile float %val13, float addrspace(1)* @var +  store volatile float %val14, float addrspace(1)* @var +  store volatile float %val15, float addrspace(1)* @var +  store volatile float %val16, float addrspace(1)* @var +  store volatile float %val17, float addrspace(1)* @var +  store volatile float %val18, float addrspace(1)* @var +  store volatile float %val19, float addrspace(1)* @var +  store volatile float %val20, float addrspace(1)* @var +  store volatile float %val21, float addrspace(1)* @var +  store volatile float %val22, float addrspace(1)* @var +  store volatile float %val23, float addrspace(1)* @var +  store volatile float %val24, float addrspace(1)* @var +  store volatile float %val25, float addrspace(1)* @var +  store volatile float %val26, float addrspace(1)* @var +  store volatile float %val27, float addrspace(1)* @var +  store volatile float %val28, float addrspace(1)* @var +  store volatile float %val29, float addrspace(1)* @var +  store volatile float %val30, float addrspace(1)* @var + +  ret void +} + +attributes #0 = { "amdgpu-num-sgpr"="14" } +attributes #1 = { "amdgpu-num-vgpr"="20" } diff --git a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s index 54c4b4a01e2..0b0404295cf 100644 --- a/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s +++ b/llvm/test/MC/AMDGPU/hsa-metadata-kernel-code-props.s @@ -14,6 +14,8 @@  // CHECK:          KernargSegmentAlign:     16  // CHECK:          WavefrontSize:           64  // CHECK:          MaxFlatWorkGroupSize:    256 +// CHECK:          NumSpilledSGPRs: 1 +// CHECK:          NumSpilledVGPRs: 1  .amd_amdgpu_hsa_metadata    Version: [ 1, 0 ]    Printf: [ '1:1:4:%d\n', '2:1:8:%g\n' ] @@ -27,4 +29,6 @@          KernargSegmentAlign:     16          WavefrontSize:           64          MaxFlatWorkGroupSize:    256 +        NumSpilledSGPRs:         1 +        NumSpilledVGPRs:         1  .end_amd_amdgpu_hsa_metadata  | 

