summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorArtem Belevich <tra@google.com>2016-08-12 22:02:19 +0000
committerArtem Belevich <tra@google.com>2016-08-12 22:02:19 +0000
commit2f0a3dfe64ab1f313320a380e5a4cd3aa11d5bc7 (patch)
treee0d89f34f0ad12cebddc7e3ce4c0731994a2a476 /llvm/lib
parent0bc85613f7cd157e499a9c849321973ed7e07216 (diff)
downloadbcm5719-llvm-2f0a3dfe64ab1f313320a380e5a4cd3aa11d5bc7.tar.gz
bcm5719-llvm-2f0a3dfe64ab1f313320a380e5a4cd3aa11d5bc7.zip
[NVPTX] Use untyped (.b) integer registers in PTX.
This bring LLVM-generated PTX closer to what nvcc generates and avoids triggering issues in ptxas. For instance, ptxas does not accept .s16 (or .u16) registers as operands for .fp16 instructions. Differential Revision: https://reviews.llvm.org/D23460 llvm-svn: 278568
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp24
1 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6f8672fcb2a..6cbf0604d7e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -33,11 +33,29 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float64RegsRegClass) {
return ".f64";
} else if (RC == &NVPTX::Int64RegsRegClass) {
- return ".s64";
+ // We use untyped (.b) integer registers here as NVCC does.
+ // Correctness of generated code does not depend on register type,
+ // but using .s/.u registers runs into ptxas bug that prevents
+ // assembly of otherwise valid PTX into SASS. Despite PTX ISA
+ // specifying only argument size for fp16 instructions, ptxas does
+ // not allow using .s16 or .u16 arguments for .fp16
+ // instructions. At the same time it allows using .s32/.u32
+ // arguments for .fp16v2 instructions:
+ //
+ // .reg .b16 rb16
+ // .reg .s16 rs16
+ // add.f16 rb16,rb16,rb16; // OK
+ // add.f16 rs16,rs16,rs16; // Arguments mismatch for instruction 'add'
+ // but:
+ // .reg .b32 rb32
+ // .reg .s32 rs32
+ // add.f16v2 rb32,rb32,rb32; // OK
+ // add.f16v2 rs32,rs32,rs32; // OK
+ return ".b64";
} else if (RC == &NVPTX::Int32RegsRegClass) {
- return ".s32";
+ return ".b32";
} else if (RC == &NVPTX::Int16RegsRegClass) {
- return ".s16";
+ return ".b16";
} else if (RC == &NVPTX::Int1RegsRegClass) {
return ".pred";
} else if (RC == &NVPTX::SpecialRegsRegClass) {
OpenPOWER on IntegriCloud