summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
diff options
context:
space:
mode:
authorJustin Holewinski <jholewinski@nvidia.com>2016-05-02 18:12:02 +0000
committerJustin Holewinski <jholewinski@nvidia.com>2016-05-02 18:12:02 +0000
commit9a6ea2c2566d32a23db43ed9108d6e4c048faf55 (patch)
tree74e8618c55f907148d0ecfa74eac53cbfab18d6f /llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
parentb2bd28128dafe1cbc9e31f230dcd2ded9fd02168 (diff)
downloadbcm5719-llvm-9a6ea2c2566d32a23db43ed9108d6e4c048faf55.tar.gz
bcm5719-llvm-9a6ea2c2566d32a23db43ed9108d6e4c048faf55.zip
[NVPTX] Fix sign/zero-extending ldg/ldu instruction selection
Summary: We don't have sign-/zero-extending ldg/ldu instructions defined, so we need to emulate them with explicit CVTs. We were originally handling the i8 case, but not any other cases. Fixes PR26185 Reviewers: jingyue, jlebar Subscribers: jholewinski Differential Revision: http://reviews.llvm.org/D19615 llvm-svn: 268272
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp122
1 files changed, 74 insertions, 48 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index b1ed2df7a17..32bb279f0e7 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -2062,61 +2062,33 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
//
// i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
//
- // Since we load an i8 value, the matching logic above will have selected an
- // LDG instruction that reads i8 and stores it in an i16 register (NVPTX does
- // not expose 8-bit registers):
- //
- // i16,ch = INT_PTX_LDG_GLOBAL_i8areg64 t7, t0
- //
- // To get the correct type in this case, truncate back to i8 and then extend
- // to the original load type.
- EVT OrigType = N->getValueType(0);
- LoadSDNode *LDSD = dyn_cast<LoadSDNode>(N);
- if (LDSD && EltVT == MVT::i8 && OrigType.getScalarSizeInBits() >= 32) {
- unsigned CvtOpc = 0;
-
- switch (LDSD->getExtensionType()) {
- default:
- llvm_unreachable("An extension is required for i8 loads");
- break;
- case ISD::SEXTLOAD:
- switch (OrigType.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("Unhandled integer load type");
- break;
- case MVT::i32:
- CvtOpc = NVPTX::CVT_s32_s8;
- break;
- case MVT::i64:
- CvtOpc = NVPTX::CVT_s64_s8;
- break;
- }
- break;
- case ISD::EXTLOAD:
- case ISD::ZEXTLOAD:
- switch (OrigType.getSimpleVT().SimpleTy) {
- default:
- llvm_unreachable("Unhandled integer load type");
- break;
- case MVT::i32:
- CvtOpc = NVPTX::CVT_u32_u8;
- break;
- case MVT::i64:
- CvtOpc = NVPTX::CVT_u64_u8;
- break;
- }
- break;
- }
+ // In this case, the matching logic above will select a load for the original
+ // memory type (in this case, i8) and our types will not match (the node needs
+ // to return an i32 in this case). Our LDG/LDU nodes do not support the
+ // concept of sign-/zero-extension, so emulate it here by adding an explicit
+ // CVT instruction. Ptxas should clean up any redundancies here.
- // For each output value, truncate to i8 (since the upper 8 bits are
- // undefined) and then extend to the desired type.
+ EVT OrigType = N->getValueType(0);
+ LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
+
+ if (OrigType != EltVT && LdNode) {
+ // We have an extending-load. The instruction we selected operates on the
+ // smaller type, but the SDNode we are replacing has the larger type. We
+ // need to emit a CVT to make the types match.
+ bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
+ unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
+ EltVT.getSimpleVT(), IsSigned);
+
+ // For each output value, apply the manual sign/zero-extension and make sure
+ // all users of the load go through that CVT.
for (unsigned i = 0; i != NumElts; ++i) {
SDValue Res(LD, i);
SDValue OrigVal(N, i);
SDNode *CvtNode =
CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
- CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32));
+ CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
+ DL, MVT::i32));
ReplaceUses(OrigVal, SDValue(CvtNode, 0));
}
}
@@ -5199,3 +5171,57 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
}
return true;
}
+
+/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
+/// conversion from \p SrcTy to \p DestTy.
+unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
+ bool IsSigned) {
+ switch (SrcTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled source type");
+ case MVT::i8:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
+ }
+ case MVT::i16:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
+ }
+ case MVT::i32:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
+ case MVT::i64:
+ return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
+ }
+ case MVT::i64:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::i8:
+ return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
+ case MVT::i16:
+ return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
+ case MVT::i32:
+ return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
+ }
+ }
+}
OpenPOWER on IntegriCloud