summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp122
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td2
-rw-r--r--llvm/test/CodeGen/NVPTX/bug26185-2.ll34
4 files changed, 111 insertions, 48 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index b1ed2df7a17..32bb279f0e7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2062,61 +2062,33 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
//
// i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
//
- // Since we load an i8 value, the matching logic above will have selected an
- // LDG instruction that reads i8 and stores it in an i16 register (NVPTX does
- // not expose 8-bit registers):
- //
- // i16,ch = INT_PTX_LDG_GLOBAL_i8areg64 t7, t0
- //
- // To get the correct type in this case, truncate back to i8 and then extend
- // to the original load type.
- EVT OrigType = N->getValueType(0);
- LoadSDNode *LDSD = dyn_cast<LoadSDNode>(N);
- if (LDSD && EltVT == MVT::i8 && OrigType.getScalarSizeInBits() >= 32) {
- unsigned CvtOpc = 0;
-
- switch (LDSD->getExtensionType()) {
- default:
- llvm_unreachable("An extension is required for i8 loads");
- break;
- case ISD::SEXTLOAD:
- switch (OrigType.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("Unhandled integer load type");
- break;
- case MVT::i32:
- CvtOpc = NVPTX::CVT_s32_s8;
- break;
- case MVT::i64:
- CvtOpc = NVPTX::CVT_s64_s8;
- break;
- }
- break;
- case ISD::EXTLOAD:
- case ISD::ZEXTLOAD:
- switch (OrigType.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("Unhandled integer load type");
- break;
- case MVT::i32:
- CvtOpc = NVPTX::CVT_u32_u8;
- break;
- case MVT::i64:
- CvtOpc = NVPTX::CVT_u64_u8;
- break;
- }
- break;
- }
+ // In this case, the matching logic above will select a load for the original
+ // memory type (in this case, i8) and our types will not match (the node needs
+ // to return an i32 in this case). Our LDG/LDU nodes do not support the
+ // concept of sign-/zero-extension, so emulate it here by adding an explicit
+ // CVT instruction. Ptxas should clean up any redundancies here.
- // For each output value, truncate to i8 (since the upper 8 bits are
- // undefined) and then extend to the desired type.
+ EVT OrigType = N->getValueType(0);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
+
+ if (OrigType != EltVT && LdNode) {
+ // We have an extending-load. The instruction we selected operates on the
+ // smaller type, but the SDNode we are replacing has the larger type. We
+ // need to emit a CVT to make the types match.
+ bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
+ unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
+ EltVT.getSimpleVT(), IsSigned);
+
+ // For each output value, apply the manual sign/zero-extension and make sure
+ // all users of the load go through that CVT.
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Res(LD, i);
SDValue OrigVal(N, i);
SDNode *CvtNode =
CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
- CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32));
+ CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ DL, MVT::i32));
ReplaceUses(OrigVal, SDValue(CvtNode, 0));
}
}
@@ -5199,3 +5171,57 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
}
return true;
}
+
+/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
+/// conversion from \p SrcTy to \p DestTy.
+unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
+ bool IsSigned) {
+ switch (SrcTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled source type");
+ case MVT::i8:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
+ }
+ case MVT::i16:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
+ }
+ case MVT::i32:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
+ }
+ case MVT::i64:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
+ }
+ }
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index b0fb63f7fe9..d62cc304e3c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -93,6 +93,7 @@ private:
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
+ static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, bool IsSigned);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 50d9ea05545..c158cc6cdab 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -377,6 +377,8 @@ let hasSideEffects = 0 in {
}
// Generate cvts from all types to all types.
+ defm CVT_s8 : CVT_FROM_ALL<"s8", Int16Regs>;
+ defm CVT_u8 : CVT_FROM_ALL<"u8", Int16Regs>;
defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
diff --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll
new file mode 100644
index 00000000000..55e9dad96c0
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/bug26185-2.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_35 | FileCheck %s
+
+; Verify that we correctly emit code for extending ldg/ldu. We do not expose
+; extending variants in the backend, but the ldg/ldu selection code may pick
+; extending loads as candidates. We do want to support this, so make sure we
+; emit the necessary cvt.* instructions to implement the extension and let ptxas
+; emit the real extending loads.
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; CHECK-LABEL: spam
+define ptx_kernel void @spam(i8 addrspace(1)* noalias nocapture readonly %arg, i8 addrspace(1)* noalias nocapture %arg1, i64 %arg2, i64 %arg3) #0 {
+bb:
+ %tmp = bitcast i8 addrspace(1)* %arg to i16 addrspace(1)*
+ %tmp4 = bitcast i8 addrspace(1)* %arg1 to i64 addrspace(1)*
+ %tmp5 = add nsw i64 %arg3, 8
+ %tmp6 = getelementptr i16, i16 addrspace(1)* %tmp, i64 %tmp5
+; CHECK: ld.global.nc.u16
+ %tmp7 = load i16, i16 addrspace(1)* %tmp6, align 2
+; CHECK: cvt.s32.s16
+ %tmp8 = sext i16 %tmp7 to i64
+ %tmp9 = mul nsw i64 %tmp8, %tmp8
+ %tmp10 = load i64, i64 addrspace(1)* %tmp4, align 8
+ %tmp11 = add nsw i64 %tmp9, %tmp10
+ store i64 %tmp11, i64 addrspace(1)* %tmp4, align 8
+ ret void
+}
+
+attributes #0 = { norecurse nounwind "polly.skip.fn" }
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void (i8 addrspace(1)*, i8 addrspace(1)*, i64, i64)* @spam, !"maxntidx", i64 1, !"maxntidy", i64 1, !"maxntidz", i64 1}
OpenPOWER on IntegriCloud