diff options
| author | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 18:57:27 +0000 | 
|---|---|---|
| committer | Justin Holewinski <justin.holewinski@gmail.com> | 2011-09-26 18:57:27 +0000 | 
| commit | b40da7f9563763bbd40b08d2a638a46862c83afd (patch) | |
| tree | cca89d8a29b2deb954d6c83b9add4cd53510f2ef /llvm/lib/Target | |
| parent | c3edaddfea75ed6aeef3a57f05c8c0a2439c1793 (diff) | |
| download | bcm5719-llvm-b40da7f9563763bbd40b08d2a638a46862c83afd.tar.gz bcm5719-llvm-b40da7f9563763bbd40b08d2a638a46862c83afd.zip | |
PTX: Implement PTXSelectionDAGInfo
llvm-svn: 140549
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/PTX/CMakeLists.txt | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp | 148 | ||||
| -rw-r--r-- | llvm/lib/Target/PTX/PTXSelectionDAGInfo.h | 53 | ||||
| -rw-r--r-- | llvm/lib/Target/PTX/PTXTargetMachine.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/PTX/PTXTargetMachine.h | 16 | 
5 files changed, 214 insertions, 5 deletions
| diff --git a/llvm/lib/Target/PTX/CMakeLists.txt b/llvm/lib/Target/PTX/CMakeLists.txt index abf6dcd0311..f711af93a46 100644 --- a/llvm/lib/Target/PTX/CMakeLists.txt +++ b/llvm/lib/Target/PTX/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_target(PTXCodeGen    PTXParamManager.cpp    PTXRegAlloc.cpp    PTXRegisterInfo.cpp +  PTXSelectionDAGInfo.cpp    PTXSubtarget.cpp    PTXTargetMachine.cpp    ) diff --git a/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp new file mode 100644 index 00000000000..e333183fc95 --- /dev/null +++ b/llvm/lib/Target/PTX/PTXSelectionDAGInfo.cpp @@ -0,0 +1,148 @@ +//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PTXSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "ptx-selectiondag-info" +#include "PTXTargetMachine.h" +#include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/SelectionDAG.h" +using namespace llvm; + +PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM) +  : TargetSelectionDAGInfo(TM), +    Subtarget(&TM.getSubtarget<PTXSubtarget>()) { +} + +PTXSelectionDAGInfo::~PTXSelectionDAGInfo() { +} + +SDValue +PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, +                                             SDValue Chain, +                                             SDValue Dst, SDValue Src, +                                             SDValue Size, unsigned Align, +                                             bool isVolatile, bool AlwaysInline, +                                             MachinePointerInfo DstPtrInfo, +                                          MachinePointerInfo SrcPtrInfo) const { +  // Do repeated 4-byte loads and stores. To be improved. +  // This requires 4-byte alignment. +  if ((Align & 3) != 0) +    return SDValue(); +  // This requires the copy size to be a constant, preferably +  // within a subtarget-specific limit. +  ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); +  if (!ConstantSize) +    return SDValue(); +  uint64_t SizeVal = ConstantSize->getZExtValue(); +  // Always inline memcpys. In PTX, we do not have a C library that provides +  // a memcpy function. +  //if (!AlwaysInline) +  //  return SDValue(); + +  unsigned BytesLeft = SizeVal & 3; +  unsigned NumMemOps = SizeVal >> 2; +  unsigned EmittedNumMemOps = 0; +  EVT VT = MVT::i32; +  unsigned VTSize = 4; +  unsigned i = 0; +  const unsigned MAX_LOADS_IN_LDM = 6; +  SDValue TFOps[MAX_LOADS_IN_LDM]; +  SDValue Loads[MAX_LOADS_IN_LDM]; +  uint64_t SrcOff = 0, DstOff = 0; + +  // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the +  // same number of stores.  The loads and stores will get combined into +  // ldm/stm later on. +  while (EmittedNumMemOps < NumMemOps) { +    for (i = 0; +         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { +      Loads[i] = DAG.getLoad(VT, dl, Chain, +                             DAG.getNode(ISD::ADD, dl, MVT::i32, Src, +                                         DAG.getConstant(SrcOff, MVT::i32)), +                             SrcPtrInfo.getWithOffset(SrcOff), isVolatile, +                             false, 0); +      TFOps[i] = Loads[i].getValue(1); +      SrcOff += VTSize; +    } +    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + +    for (i = 0; +         i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) { +      TFOps[i] = DAG.getStore(Chain, dl, Loads[i], +                              DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, +                                          DAG.getConstant(DstOff, MVT::i32)), +                              DstPtrInfo.getWithOffset(DstOff), +                              isVolatile, false, 0); +      DstOff += VTSize; +    } +    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + +    EmittedNumMemOps += i; +  } + +  if (BytesLeft == 0) +    return Chain; + +  // Issue loads / stores for the trailing (1 - 3) bytes. +  unsigned BytesLeftSave = BytesLeft; +  i = 0; +  while (BytesLeft) { +    if (BytesLeft >= 2) { +      VT = MVT::i16; +      VTSize = 2; +    } else { +      VT = MVT::i8; +      VTSize = 1; +    } + +    Loads[i] = DAG.getLoad(VT, dl, Chain, +                           DAG.getNode(ISD::ADD, dl, MVT::i32, Src, +                                       DAG.getConstant(SrcOff, MVT::i32)), +                           SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); +    TFOps[i] = Loads[i].getValue(1); +    ++i; +    SrcOff += VTSize; +    BytesLeft -= VTSize; +  } +  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); + +  i = 0; +  BytesLeft = BytesLeftSave; +  while (BytesLeft) { +    if (BytesLeft >= 2) { +      VT = MVT::i16; +      VTSize = 2; +    } else { +      VT = MVT::i8; +      VTSize = 1; +    } + +    TFOps[i] = DAG.getStore(Chain, dl, Loads[i], +                            DAG.getNode(ISD::ADD, dl, MVT::i32, Dst, +                                        DAG.getConstant(DstOff, MVT::i32)), +                            DstPtrInfo.getWithOffset(DstOff), false, false, 0); +    ++i; +    DstOff += VTSize; +    BytesLeft -= VTSize; +  } +  return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i); +} + +SDValue PTXSelectionDAGInfo:: +EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, +                        SDValue Chain, SDValue Dst, +                        SDValue Src, SDValue Size, +                        unsigned Align, bool isVolatile, +                        MachinePointerInfo DstPtrInfo) const { +  llvm_unreachable("memset lowering not implemented for PTX yet"); +} + diff --git a/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h b/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h new file mode 100644 index 00000000000..e0c716718f0 --- /dev/null +++ b/llvm/lib/Target/PTX/PTXSelectionDAGInfo.h @@ -0,0 +1,53 @@ +//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the PTX subclass for TargetSelectionDAGInfo. +// +//===----------------------------------------------------------------------===// + +#ifndef PTXSELECTIONDAGINFO_H +#define PTXSELECTIONDAGINFO_H + +#include "llvm/Target/TargetSelectionDAGInfo.h" + +namespace llvm { + +/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target. +/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo. +class PTXSelectionDAGInfo : public TargetSelectionDAGInfo { +  /// Subtarget - Keep a pointer to the PTXSubtarget around so that we can +  /// make the right decision when generating code for different targets. +  const PTXSubtarget *Subtarget; + +public: +  explicit PTXSelectionDAGInfo(const TargetMachine &TM); +  ~PTXSelectionDAGInfo(); + +  virtual +  SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, +                                  SDValue Chain, +                                  SDValue Dst, SDValue Src, +                                  SDValue Size, unsigned Align, +                                  bool isVolatile, bool AlwaysInline, +                                  MachinePointerInfo DstPtrInfo, +                                  MachinePointerInfo SrcPtrInfo) const; + +  virtual +  SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, +                                  SDValue Chain, +                                  SDValue Op1, SDValue Op2, +                                  SDValue Op3, unsigned Align, +                                  bool isVolatile, +                                  MachinePointerInfo DstPtrInfo) const; +}; + +} + +#endif + diff --git a/llvm/lib/Target/PTX/PTXTargetMachine.cpp b/llvm/lib/Target/PTX/PTXTargetMachine.cpp index a6db0b7f2e6..1f3f1721a65 100644 --- a/llvm/lib/Target/PTX/PTXTargetMachine.cpp +++ b/llvm/lib/Target/PTX/PTXTargetMachine.cpp @@ -94,6 +94,7 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,      Subtarget(TT, CPU, FS, is64Bit),      FrameLowering(Subtarget),      InstrInfo(*this), +    TSInfo(*this),      TLInfo(*this) {  } diff --git a/llvm/lib/Target/PTX/PTXTargetMachine.h b/llvm/lib/Target/PTX/PTXTargetMachine.h index d5726b9866b..5b7c82b1f4f 100644 --- a/llvm/lib/Target/PTX/PTXTargetMachine.h +++ b/llvm/lib/Target/PTX/PTXTargetMachine.h @@ -17,6 +17,7 @@  #include "PTXISelLowering.h"  #include "PTXInstrInfo.h"  #include "PTXFrameLowering.h" +#include "PTXSelectionDAGInfo.h"  #include "PTXSubtarget.h"  #include "llvm/Target/TargetData.h"  #include "llvm/Target/TargetFrameLowering.h" @@ -25,11 +26,12 @@  namespace llvm {  class PTXTargetMachine : public LLVMTargetMachine {    private: -    const TargetData  DataLayout; -    PTXSubtarget      Subtarget; // has to be initialized before FrameLowering -    PTXFrameLowering  FrameLowering; -    PTXInstrInfo      InstrInfo; -    PTXTargetLowering TLInfo; +    const TargetData    DataLayout; +    PTXSubtarget        Subtarget; // has to be initialized before FrameLowering +    PTXFrameLowering    FrameLowering; +    PTXInstrInfo        InstrInfo; +    PTXSelectionDAGInfo TSInfo; +    PTXTargetLowering   TLInfo;    public:      PTXTargetMachine(const Target &T, StringRef TT, @@ -50,6 +52,10 @@ class PTXTargetMachine : public LLVMTargetMachine {      virtual const PTXTargetLowering *getTargetLowering() const {        return &TLInfo; } +    virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const { +      return &TSInfo; +    } +      virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }      virtual bool addInstSelector(PassManagerBase &PM, | 

