summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp24
1 files changed, 21 insertions, 3 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6f8672fcb2a..6cbf0604d7e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -33,11 +33,29 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float64RegsRegClass) {
return ".f64";
} else if (RC == &NVPTX::Int64RegsRegClass) {
- return ".s64";
+ // We use untyped (.b) integer registers here as NVCC does.
+ // Correctness of generated code does not depend on register type,
+ // but using .s/.u registers runs into ptxas bug that prevents
+ // assembly of otherwise valid PTX into SASS. Despite PTX ISA
+ // specifying only argument size for fp16 instructions, ptxas does
+ // not allow using .s16 or .u16 arguments for .fp16
+ // instructions. At the same time it allows using .s32/.u32
+ // arguments for .fp16v2 instructions:
+ //
+ // .reg .b16 rb16
+ // .reg .s16 rs16
+ // add.f16 rb16,rb16,rb16; // OK
+ // add.f16 rs16,rs16,rs16; // Arguments mismatch for instruction 'add'
+ // but:
+ // .reg .b32 rb32
+ // .reg .s32 rs32
+ // add.f16v2 rb32,rb32,rb32; // OK
+ // add.f16v2 rs32,rs32,rs32; // OK
+ return ".b64";
} else if (RC == &NVPTX::Int32RegsRegClass) {
- return ".s32";
+ return ".b32";
} else if (RC == &NVPTX::Int16RegsRegClass) {
- return ".s16";
+ return ".b16";
} else if (RC == &NVPTX::Int1RegsRegClass) {
return ".pred";
} else if (RC == &NVPTX::SpecialRegsRegClass) {
OpenPOWER on IntegriCloud