diff options
| author | Justin Holewinski <jholewinski@nvidia.com> | 2016-05-02 18:12:02 +0000 |
|---|---|---|
| committer | Justin Holewinski <jholewinski@nvidia.com> | 2016-05-02 18:12:02 +0000 |
| commit | 9a6ea2c2566d32a23db43ed9108d6e4c048faf55 (patch) | |
| tree | 74e8618c55f907148d0ecfa74eac53cbfab18d6f /llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | |
| parent | b2bd28128dafe1cbc9e31f230dcd2ded9fd02168 (diff) | |
| download | bcm5719-llvm-9a6ea2c2566d32a23db43ed9108d6e4c048faf55.tar.gz bcm5719-llvm-9a6ea2c2566d32a23db43ed9108d6e4c048faf55.zip | |
[NVPTX] Fix sign/zero-extending ldg/ldu instruction selection
Summary:
We don't have sign-/zero-extending ldg/ldu instructions defined,
so we need to emulate them with explicit CVTs. We were originally
handling the i8 case, but not any other cases.
Fixes PR26185
Reviewers: jingyue, jlebar
Subscribers: jholewinski
Differential Revision: http://reviews.llvm.org/D19615
llvm-svn: 268272
Diffstat (limited to 'llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp')
| -rw-r--r-- | llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 122 |
1 files changed, 74 insertions, 48 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index b1ed2df7a17..32bb279f0e7 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -2062,61 +2062,33 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { // // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64 // - // Since we load an i8 value, the matching logic above will have selected an - // LDG instruction that reads i8 and stores it in an i16 register (NVPTX does - // not expose 8-bit registers): - // - // i16,ch = INT_PTX_LDG_GLOBAL_i8areg64 t7, t0 - // - // To get the correct type in this case, truncate back to i8 and then extend - // to the original load type. - EVT OrigType = N->getValueType(0); - LoadSDNode *LDSD = dyn_cast<LoadSDNode>(N); - if (LDSD && EltVT == MVT::i8 && OrigType.getScalarSizeInBits() >= 32) { - unsigned CvtOpc = 0; - - switch (LDSD->getExtensionType()) { - default: - llvm_unreachable("An extension is required for i8 loads"); - break; - case ISD::SEXTLOAD: - switch (OrigType.getSimpleVT().SimpleTy) { - default: - llvm_unreachable("Unhandled integer load type"); - break; - case MVT::i32: - CvtOpc = NVPTX::CVT_s32_s8; - break; - case MVT::i64: - CvtOpc = NVPTX::CVT_s64_s8; - break; - } - break; - case ISD::EXTLOAD: - case ISD::ZEXTLOAD: - switch (OrigType.getSimpleVT().SimpleTy) { - default: - llvm_unreachable("Unhandled integer load type"); - break; - case MVT::i32: - CvtOpc = NVPTX::CVT_u32_u8; - break; - case MVT::i64: - CvtOpc = NVPTX::CVT_u64_u8; - break; - } - break; - } + // In this case, the matching logic above will select a load for the original + // memory type (in this case, i8) and our types will not match (the node needs + // to return an i32 in this case). Our LDG/LDU nodes do not support the + // concept of sign-/zero-extension, so emulate it here by adding an explicit + // CVT instruction. Ptxas should clean up any redundancies here. - // For each output value, truncate to i8 (since the upper 8 bits are - // undefined) and then extend to the desired type. + EVT OrigType = N->getValueType(0); + LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N); + + if (OrigType != EltVT && LdNode) { + // We have an extending-load. The instruction we selected operates on the + // smaller type, but the SDNode we are replacing has the larger type. We + // need to emit a CVT to make the types match. + bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD; + unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(), + EltVT.getSimpleVT(), IsSigned); + + // For each output value, apply the manual sign/zero-extension and make sure + // all users of the load go through that CVT. for (unsigned i = 0; i != NumElts; ++i) { SDValue Res(LD, i); SDValue OrigVal(N, i); SDNode *CvtNode = CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res, - CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32)); + CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, + DL, MVT::i32)); ReplaceUses(OrigVal, SDValue(CvtNode, 0)); } } @@ -5199,3 +5171,57 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand( } return true; } + +/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a +/// conversion from \p SrcTy to \p DestTy. +unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy, + bool IsSigned) { + switch (SrcTy.SimpleTy) { + default: + llvm_unreachable("Unhandled source type"); + case MVT::i8: + switch (DestTy.SimpleTy) { + default: + llvm_unreachable("Unhandled dest type"); + case MVT::i16: + return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8; + case MVT::i32: + return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8; + case MVT::i64: + return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8; + } + case MVT::i16: + switch (DestTy.SimpleTy) { + default: + llvm_unreachable("Unhandled dest type"); + case MVT::i8: + return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16; + case MVT::i32: + return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16; + case MVT::i64: + return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16; + } + case MVT::i32: + switch (DestTy.SimpleTy) { + default: + llvm_unreachable("Unhandled dest type"); + case MVT::i8: + return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32; + case MVT::i16: + return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32; + case MVT::i64: + return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32; + } + case MVT::i64: + switch (DestTy.SimpleTy) { + default: + llvm_unreachable("Unhandled dest type"); + case MVT::i8: + return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64; + case MVT::i16: + return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64; + case MVT::i32: + return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64; + } + } +} |

