diff options
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 111 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUISelLowering.h | 6 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUInstrInfo.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDGPUSubtarget.h | 9 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDILISelLowering.cpp | 35 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/AMDILIntrinsics.td | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/R600ISelLowering.cpp | 5 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/R600Instructions.td | 17 | ||||
| -rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 10 | 
9 files changed, 154 insertions, 47 deletions
| diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp index 4e4b12eacc9..ddf251f38bf 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.cpp @@ -211,6 +211,20 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :      setOperationAction(ISD::FSUB, VT, Expand);      setOperationAction(ISD::SELECT, VT, Expand);    } + +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); + +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Custom); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); + +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Custom); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); + +  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);  }  //===----------------------------------------------------------------------===// @@ -927,6 +941,101 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,  } +SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op, +                                                      unsigned BitsDiff, +                                                      SelectionDAG &DAG) const { +  MVT VT = Op.getSimpleValueType(); +  SDLoc DL(Op); +  SDValue Shift = DAG.getConstant(BitsDiff, VT); +  // Shift left by 'Shift' bits. +  SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Op.getOperand(0), Shift); +  // Signed shift Right by 'Shift' bits. +  return DAG.getNode(ISD::SRA, DL, VT, Shl, Shift); +} + +SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, +                                                     SelectionDAG &DAG) const { +  EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); +  MVT VT = Op.getSimpleValueType(); +  MVT ScalarVT = VT.getScalarType(); + +  unsigned SrcBits = ExtraVT.getScalarType().getSizeInBits(); +  unsigned DestBits = ScalarVT.getSizeInBits(); +  unsigned BitsDiff = DestBits - SrcBits; + +  if (!Subtarget->hasBFE()) +    return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); + +  SDValue Src = Op.getOperand(0); +  if (VT.isVector()) { +    SDLoc DL(Op); +    // Need to scalarize this, and revisit each of the scalars later. +    // TODO: Don't scalarize on Evergreen? +    unsigned NElts = VT.getVectorNumElements(); +    SmallVector<SDValue, 8> Args; +    ExtractVectorElements(Src, DAG, Args, 0, NElts); + +    SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); +    for (unsigned I = 0; I < NElts; ++I) +      Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); + +    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Args.data(), Args.size()); +  } + +  if (SrcBits == 32) { +    SDLoc DL(Op); + +    // If the source is 32-bits, this is really half of a 2-register pair, and +    // we need to discard the unused half of the pair. +    SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Src); +    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, TruncSrc); +  } + +  unsigned NElts = VT.isVector() ? VT.getVectorNumElements() : 1; + +  // TODO: Match 64-bit BFE. SI has a 64-bit BFE, but it's scalar only so it +  // might not be worth the effort, and will need to expand to shifts when +  // fixing SGPR copies. +  if (SrcBits < 32 && DestBits <= 32) { +    SDLoc DL(Op); +    MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); + +    if (DestBits != 32) +      Src = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVT, Src); + +    // FIXME: This should use TargetConstant, but that hits assertions for +    // Evergreen. +    SDValue Ext = DAG.getNode(AMDGPUISD::BFE_I32, DL, ExtVT, +                              Op.getOperand(0), // Operand +                              DAG.getConstant(0, ExtVT), // Offset +                              DAG.getConstant(SrcBits, ExtVT)); // Width + +    // Truncate to the original type if necessary. +    if (ScalarVT == MVT::i32) +      return Ext; +    return DAG.getNode(ISD::TRUNCATE, DL, VT, Ext); +  } + +  // For small types, extend to 32-bits first. +  if (SrcBits < 32) { +    SDLoc DL(Op); +    MVT ExtVT = (NElts == 1) ? MVT::i32 : MVT::getVectorVT(MVT::i32, NElts); + +    SDValue TruncSrc = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, Src); +    SDValue Ext32 = DAG.getNode(AMDGPUISD::BFE_I32, +                                DL, +                                ExtVT, +                                TruncSrc, // Operand +                                DAG.getConstant(0, ExtVT), // Offset +                                DAG.getConstant(SrcBits, ExtVT)); // Width + +    return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Ext32); +  } + +  // For everything else, use the standard bitshift expansion. +  return ExpandSIGN_EXTEND_INREG(Op, BitsDiff, DAG); +} +  //===----------------------------------------------------------------------===//  // Helper functions  //===----------------------------------------------------------------------===// @@ -1019,6 +1128,8 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {    NODE_NAME_CASE(FMIN)    NODE_NAME_CASE(SMIN)    NODE_NAME_CASE(UMIN) +  NODE_NAME_CASE(BFE_U32) +  NODE_NAME_CASE(BFE_I32)    NODE_NAME_CASE(URECIP)    NODE_NAME_CASE(DOT4)    NODE_NAME_CASE(EXPORT) diff --git a/llvm/lib/Target/R600/AMDGPUISelLowering.h b/llvm/lib/Target/R600/AMDGPUISelLowering.h index 2efb9c78a3e..2595c51d166 100644 --- a/llvm/lib/Target/R600/AMDGPUISelLowering.h +++ b/llvm/lib/Target/R600/AMDGPUISelLowering.h @@ -142,6 +142,10 @@ private:    SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;    SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;    SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; + +  SDValue ExpandSIGN_EXTEND_INREG(SDValue Op, +                                  unsigned BitsDiff, +                                  SelectionDAG &DAG) const;    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;    EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; @@ -171,6 +175,8 @@ enum {    UMIN,    URECIP,    DOT4, +  BFE_U32, // Extract range of bits with zero extension to 32-bits. +  BFE_I32, // Extract range of bits with sign extension to 32-bits.    TEXTURE_FETCH,    EXPORT,    CONST_ADDRESS, diff --git a/llvm/lib/Target/R600/AMDGPUInstrInfo.td b/llvm/lib/Target/R600/AMDGPUInstrInfo.td index fccede01ab9..2138bd23a36 100644 --- a/llvm/lib/Target/R600/AMDGPUInstrInfo.td +++ b/llvm/lib/Target/R600/AMDGPUInstrInfo.td @@ -86,3 +86,7 @@ def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR",  def AMDGPUround : SDNode<"ISD::FROUND",                           SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>>; + +def AMDGPUbfe_u32 : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>; +def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; + diff --git a/llvm/lib/Target/R600/AMDGPUSubtarget.h b/llvm/lib/Target/R600/AMDGPUSubtarget.h index 7e7f4d0c004..8874d14c18c 100644 --- a/llvm/lib/Target/R600/AMDGPUSubtarget.h +++ b/llvm/lib/Target/R600/AMDGPUSubtarget.h @@ -68,6 +68,15 @@ public:    enum Generation getGeneration() const;    bool hasHWFP64() const;    bool hasCaymanISA() const; + +  bool hasBFE() const { +    return (getGeneration() >= EVERGREEN); +  } + +  bool hasBFM() const { +    return hasBFE(); +  } +    bool IsIRStructurizerEnabled() const;    bool isIfCvtEnabled() const;    unsigned getWavefrontSize() const; diff --git a/llvm/lib/Target/R600/AMDILISelLowering.cpp b/llvm/lib/Target/R600/AMDILISelLowering.cpp index 970787ef31e..5dfaad4c1c3 100644 --- a/llvm/lib/Target/R600/AMDILISelLowering.cpp +++ b/llvm/lib/Target/R600/AMDILISelLowering.cpp @@ -94,9 +94,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() {    for (unsigned int x  = 0; x < NumTypes; ++x) {      MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; -    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types -    // We cannot sextinreg, expand to shifts -    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);      setOperationAction(ISD::SUBE, VT, Expand);      setOperationAction(ISD::SUBC, VT, Expand);      setOperationAction(ISD::ADDE, VT, Expand); @@ -191,14 +188,12 @@ void AMDGPUTargetLowering::InitAMDILLowering() {    setOperationAction(ISD::UDIV, MVT::v4i8, Expand);    setOperationAction(ISD::UDIV, MVT::v2i16, Expand);    setOperationAction(ISD::UDIV, MVT::v4i16, Expand); -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);    setOperationAction(ISD::SUBC, MVT::Other, Expand);    setOperationAction(ISD::ADDE, MVT::Other, Expand);    setOperationAction(ISD::ADDC, MVT::Other, Expand);    setOperationAction(ISD::BRCOND, MVT::Other, Custom);    setOperationAction(ISD::BR_JT, MVT::Other, Expand);    setOperationAction(ISD::BRIND, MVT::Other, Expand); -  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);    // Use the default implementation. @@ -322,36 +317,6 @@ AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const {    return DST;  } -SDValue -AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { -  SDValue Data = Op.getOperand(0); -  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); -  SDLoc DL(Op); -  EVT DVT = Data.getValueType(); -  EVT BVT = BaseType->getVT(); -  unsigned baseBits = BVT.getScalarType().getSizeInBits(); -  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; -  unsigned shiftBits = srcBits - baseBits; -  if (srcBits < 32) { -    // If the op is less than 32 bits, then it needs to extend to 32bits -    // so it can properly keep the upper bits valid. -    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); -    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); -    shiftBits = 32 - baseBits; -    DVT = IVT; -  } -  SDValue Shift = DAG.getConstant(shiftBits, DVT); -  // Shift left by 'Shift' bits. -  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); -  // Signed shift Right by 'Shift' bits. -  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); -  if (srcBits < 32) { -    // Once the sign extension is done, the op needs to be converted to -    // its original type. -    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); -  } -  return Data; -}  EVT  AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const {    int iSize = (size * numEle); diff --git a/llvm/lib/Target/R600/AMDILIntrinsics.td b/llvm/lib/Target/R600/AMDILIntrinsics.td index 6ec3559af24..658deb5bc01 100644 --- a/llvm/lib/Target/R600/AMDILIntrinsics.td +++ b/llvm/lib/Target/R600/AMDILIntrinsics.td @@ -68,10 +68,6 @@ let TargetPrefix = "AMDIL", isTarget = 1 in {  let TargetPrefix = "AMDIL", isTarget = 1 in {    def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; -  def int_AMDIL_bit_extract_i32 : GCCBuiltin<"__amdil_ibit_extract">, -          TernaryIntInt; -  def int_AMDIL_bit_extract_u32 : GCCBuiltin<"__amdil_ubit_extract">, -          TernaryIntInt;    def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">,            UnaryIntInt;    def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, diff --git a/llvm/lib/Target/R600/R600ISelLowering.cpp b/llvm/lib/Target/R600/R600ISelLowering.cpp index 8c737125c85..4d15321fd02 100644 --- a/llvm/lib/Target/R600/R600ISelLowering.cpp +++ b/llvm/lib/Target/R600/R600ISelLowering.cpp @@ -1383,6 +1383,11 @@ SDValue R600TargetLowering::LowerFormalArguments(      PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),                                                     AMDGPUAS::CONSTANT_BUFFER_0); +    // i64 isn't a legal type, so the register type used ends up as i32, which +    // isn't expected here. It attempts to create this sextload, but it ends up +    // being invalid. Somehow this seems to work with i64 arguments, but breaks +    // for <1 x i64>. +      // The first 36 bytes of the input buffer contains information about      // thread group and global sizes.      SDValue Arg = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, Chain, diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index 698ad4afe60..ae3d8747a4d 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -1517,15 +1517,20 @@ let Predicates = [isEGorCayman] in {    // Example Usage:    // (Offset, Width)    // -  // (0, 8)           = (Input << 24) >> 24  = (Input &  0xff)       >> 0 -  // (8, 8)           = (Input << 16) >> 24  = (Input &  0xffff)     >> 8 -  // (16,8)           = (Input <<  8) >> 24  = (Input &  0xffffff)   >> 16 -  // (24,8)           = (Input <<  0) >> 24  = (Input &  0xffffffff) >> 24 +  // (0, 8)  = (Input << 24) >> 24 = (Input &  0xff)       >> 0 +  // (8, 8)  = (Input << 16) >> 24 = (Input &  0xffff)     >> 8 +  // (16, 8) = (Input <<  8) >> 24 = (Input &  0xffffff)   >> 16 +  // (24, 8) = (Input <<  0) >> 24 = (Input &  0xffffffff) >> 24    def BFE_UINT_eg : R600_3OP <0x4, "BFE_UINT", -    [(set i32:$dst, (int_AMDIL_bit_extract_u32 i32:$src0, i32:$src1, -                                               i32:$src2))], +    [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))],      VecALU    >; + +  def BFE_INT_eg : R600_3OP <0x4, "BFE_INT", +    [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))], +    VecALU +  >; +  // XXX: This pattern is broken, disabling for now.  See comment in  // AMDGPUInstructions.td for more info.  //  def : BFEPattern <BFE_UINT_eg>; diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 9a18f7bc350..68b89a8c351 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1074,8 +1074,14 @@ def V_CUBEID_F32 : VOP3_32 <0x00000144, "V_CUBEID_F32", []>;  def V_CUBESC_F32 : VOP3_32 <0x00000145, "V_CUBESC_F32", []>;  def V_CUBETC_F32 : VOP3_32 <0x00000146, "V_CUBETC_F32", []>;  def V_CUBEMA_F32 : VOP3_32 <0x00000147, "V_CUBEMA_F32", []>; -def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", []>; -def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", []>; + +let neverHasSideEffects = 1, mayLoad = 0, mayStore = 0 in { +def V_BFE_U32 : VOP3_32 <0x00000148, "V_BFE_U32", +  [(set i32:$dst, (AMDGPUbfe_u32 i32:$src0, i32:$src1, i32:$src2))]>; +def V_BFE_I32 : VOP3_32 <0x00000149, "V_BFE_I32", +  [(set i32:$dst, (AMDGPUbfe_i32 i32:$src0, i32:$src1, i32:$src2))]>; +} +  def V_BFI_B32 : VOP3_32 <0x0000014a, "V_BFI_B32", []>;  defm : BFIPatterns <V_BFI_B32>;  def V_FMA_F32 : VOP3_32 <0x0000014b, "V_FMA_F32", | 

