summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp52
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp18
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp24
3 files changed, 15 insertions, 79 deletions
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index c3b6f7b034b..61c6758ef11 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -662,58 +662,6 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
break;
case ADDRESS_SPACE_PARAM:
- if (Src.getOpcode() == NVPTXISD::MoveParam) {
- // TL;DR: addrspacecast MoveParam to param space is a no-op.
- //
- // Longer version is that this particular change is both an
- // optimization and a work-around for a problem in ptxas for
- // sm_50+.
- //
- // Background:
- // According to PTX ISA v7.5 5.1.6.4: "...whenever a formal
- // parameter has its address taken within the called function
- // [..] the parameter will be copied to the stack if
- // necessary, and so the address will be in the .local state
- // space and is accessed via ld.local and st.local
- // instructions."
- //
- // Bug part: 'copied to the stack if necessary' part is
- // currently broken for byval arguments with alignment < 4 for
- // sm_50+ in all public versions of CUDA. Taking the address of
- // such an argument results in SASS code that attempts to store
- // the value using a 32-bit write to an unaligned address.
- //
- // Optimization: On top of the overhead of spill-to-stack, we
- // also convert that address from local to generic space,
- // which may result in further overhead. All of it is in most
- // cases unnecessary, as we only need the value of the
- // variable, and it can be accessed directly by using the
- // argument symbol. We'll use the address of the local copy if
- // it's needed (see step (a) below).
- //
- // In order for this bit to do its job we need to:
- //
- // a) Let LLVM do the spilling and make sure the IR does not
- // access byval arguments directly. Instead, the argument
- // pointer (which is in the default address space) is
- // addrspacecast'ed to param space, and the data it points to
- // is copied to allocated local space (i.e. we let compiler
- // spill it, which gives us an opportunity to optimize the
- // spill away later if nobody needs its address). Then all
- // uses of arg pointer are replaced with a pointer to local
- // copy of the arg. All this is done in NVPTXLowerKernelArgs.
- //
- // b) LowerFormalArguments() lowers the argument to
- // NVPTXISD::MoveParam without any space conversion.
- //
- // c) And the final step is done by the code below.
- // It replaces the addrspacecast (MoveParam) from step (a)
- // with the arg symbol itself. This can then be used for
- // [symbol + offset] addressing.
-
- ReplaceNode(N, Src.getOperand(0).getNode());
- return;
- }
Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
: NVPTX::nvvm_ptr_gen_to_param;
break;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 7d457aff46c..e81f824b2f3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2078,6 +2078,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue Root = DAG.getRoot();
std::vector<SDValue> OutChains;
+ bool isKernel = llvm::isKernelFunction(*F);
bool isABI = (STI.getSmVersion() >= 20);
assert(isABI && "Non-ABI compilation is not supported");
if (!isABI)
@@ -2111,8 +2112,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
theArgs[i],
(theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
: nullptr))) {
- assert(llvm::isKernelFunction(*F) &&
- "Only kernels can have image/sampler params");
+ assert(isKernel && "Only kernels can have image/sampler params");
InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
continue;
}
@@ -2334,11 +2334,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// machine instruction fails because TargetExternalSymbol
// (not lowered) is target dependent, and CopyToReg assumes
// the source is lowered.
- //
- // Byval arguments for regular function are lowered the same way
- // as for kernels in order to generate better code and work around
- // a known issue in ptxas. See comments in
- // NVPTXDAGToDAGISel::SelectAddrSpaceCast() for the gory details.
EVT ObjectVT = getValueType(DL, Ty);
assert(ObjectVT == Ins[InsIdx].VT &&
"Ins type did not match function type");
@@ -2346,7 +2341,14 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
if (p.getNode())
p.getNode()->setIROrder(idx + 1);
- InVals.push_back(p);
+ if (isKernel)
+ InVals.push_back(p);
+ else {
+ SDValue p2 = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
+ DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
+ InVals.push_back(p2);
+ }
}
// Clang will check explicit VarArg and issue error if any. However, Clang
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
index a4769b319a0..d162a283f74 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -101,11 +101,6 @@ namespace {
class NVPTXLowerKernelArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
- // Kernels and regular device functions treat byval arguments
- // differently.
- bool runOnKernelFunction(Function &F);
- bool runOnDeviceFunction(Function &F);
-
// handle byval parameters
void handleByValParam(Argument *Arg);
// Knowing Ptr must point to the global address space, this function
@@ -197,7 +192,11 @@ void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
// =============================================================================
// Main function for this pass.
// =============================================================================
-bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
+bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
+ // Skip non-kernels. See the comments at the top of this file.
+ if (!isKernelFunction(F))
+ return false;
+
if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
// Mark pointers in byval structs as global.
for (auto &B : F) {
@@ -229,19 +228,6 @@ bool NVPTXLowerKernelArgs::runOnKernelFunction(Function &F) {
return true;
}
-// See comments in NVPTXDAGToDAGISel::SelectAddrSpaceCast() for the
-// details on why we need to spill byval arguments into local memory.
-bool NVPTXLowerKernelArgs::runOnDeviceFunction(Function &F) {
- for (Argument &Arg : F.args())
- if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
- handleByValParam(&Arg);
- return true;
-}
-
-bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
- return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
-}
-
FunctionPass *
llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
return new NVPTXLowerKernelArgs(TM);
OpenPOWER on IntegriCloud