diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp | 1850 |
1 files changed, 1850 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp new file mode 100644 index 00000000000..af991229ba9 --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp @@ -0,0 +1,1850 @@ +//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//==-----------------------------------------------------------------------===// +// +// This file implements the interfaces that AMDIL uses to lower LLVM code into a +// selection DAG. +// +//===----------------------------------------------------------------------===// + +#include "AMDILISelLowering.h" +#include "AMDILDevices.h" +#include "AMDILIntrinsicInfo.h" +#include "AMDILRegisterInfo.h" +#include "AMDILSubtarget.h" +#include "AMDILUtilityFunctions.h" +#include "llvm/CallingConv.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Instructions.h" +#include "llvm/Intrinsics.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; +#define ISDBITCAST ISD::BITCAST +#define MVTGLUE MVT::Glue +//===----------------------------------------------------------------------===// +// Calling Convention Implementation +//===----------------------------------------------------------------------===// +#include "AMDGPUGenCallingConv.inc" + +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions Begin +//===----------------------------------------------------------------------===// + static SDValue +getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) +{ + DebugLoc DL = Src.getDebugLoc(); + EVT svt = Src.getValueType().getScalarType(); + EVT dvt = Dst.getValueType().getScalarType(); + if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { + if (dvt.bitsGT(svt)) { + Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); + } else if (svt.bitsLT(svt)) { + Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, + DAG.getConstant(1, MVT::i32)); + } + } else if (svt.isInteger() && dvt.isInteger()) { + if (!svt.bitsEq(dvt)) { + Src = DAG.getSExtOrTrunc(Src, DL, dvt); + } + } else if (svt.isInteger()) { + unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; + if (!svt.bitsEq(dvt)) { + if (dvt.getSimpleVT().SimpleTy == MVT::f32) { + Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); + } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { + Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); + } else { + assert(0 && "We only support 32 and 64bit fp types"); + } + } + Src = DAG.getNode(opcode, DL, dvt, Src); + } else if (dvt.isInteger()) { + unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; + if (svt.getSimpleVT().SimpleTy == MVT::f32) { + Src = DAG.getNode(opcode, DL, MVT::i32, Src); + } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { + Src = DAG.getNode(opcode, DL, MVT::i64, Src); + } else { + assert(0 && "We only support 32 and 64bit fp types"); + } + Src = DAG.getSExtOrTrunc(Src, DL, dvt); + } + return Src; +} +// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC +// condition. + static AMDILCC::CondCodes +CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) +{ + switch (CC) { + default: + { + errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; + assert(0 && "Unknown condition code!"); + } + case ISD::SETO: + switch(type) { + case MVT::f32: + return AMDILCC::IL_CC_F_O; + case MVT::f64: + return AMDILCC::IL_CC_D_O; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUO: + switch(type) { + case MVT::f32: + return AMDILCC::IL_CC_F_UO; + case MVT::f64: + return AMDILCC::IL_CC_D_UO; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETGT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_GT; + case MVT::f32: + return AMDILCC::IL_CC_F_GT; + case MVT::f64: + return AMDILCC::IL_CC_D_GT; + case MVT::i64: + return AMDILCC::IL_CC_L_GT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETGE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_GE; + case MVT::f32: + return AMDILCC::IL_CC_F_GE; + case MVT::f64: + return AMDILCC::IL_CC_D_GE; + case MVT::i64: + return AMDILCC::IL_CC_L_GE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETLT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_LT; + case MVT::f32: + return AMDILCC::IL_CC_F_LT; + case MVT::f64: + return AMDILCC::IL_CC_D_LT; + case MVT::i64: + return AMDILCC::IL_CC_L_LT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETLE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_LE; + case MVT::f32: + return AMDILCC::IL_CC_F_LE; + case MVT::f64: + return AMDILCC::IL_CC_D_LE; + case MVT::i64: + return AMDILCC::IL_CC_L_LE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETNE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_NE; + case MVT::f32: + return AMDILCC::IL_CC_F_NE; + case MVT::f64: + return AMDILCC::IL_CC_D_NE; + case MVT::i64: + return AMDILCC::IL_CC_L_NE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETEQ: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_I_EQ; + case MVT::f32: + return AMDILCC::IL_CC_F_EQ; + case MVT::f64: + return AMDILCC::IL_CC_D_EQ; + case MVT::i64: + return AMDILCC::IL_CC_L_EQ; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUGT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_GT; + case MVT::f32: + return AMDILCC::IL_CC_F_UGT; + case MVT::f64: + return AMDILCC::IL_CC_D_UGT; + case MVT::i64: + return AMDILCC::IL_CC_UL_GT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUGE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_GE; + case MVT::f32: + return AMDILCC::IL_CC_F_UGE; + case MVT::f64: + return AMDILCC::IL_CC_D_UGE; + case MVT::i64: + return AMDILCC::IL_CC_UL_GE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETULT: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_LT; + case MVT::f32: + return AMDILCC::IL_CC_F_ULT; + case MVT::f64: + return AMDILCC::IL_CC_D_ULT; + case MVT::i64: + return AMDILCC::IL_CC_UL_LT; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETULE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_LE; + case MVT::f32: + return AMDILCC::IL_CC_F_ULE; + case MVT::f64: + return AMDILCC::IL_CC_D_ULE; + case MVT::i64: + return AMDILCC::IL_CC_UL_LE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUNE: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_NE; + case MVT::f32: + return AMDILCC::IL_CC_F_UNE; + case MVT::f64: + return AMDILCC::IL_CC_D_UNE; + case MVT::i64: + return AMDILCC::IL_CC_UL_NE; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETUEQ: + switch (type) { + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + return AMDILCC::IL_CC_U_EQ; + case MVT::f32: + return AMDILCC::IL_CC_F_UEQ; + case MVT::f64: + return AMDILCC::IL_CC_D_UEQ; + case MVT::i64: + return AMDILCC::IL_CC_UL_EQ; + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOGT: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OGT; + case MVT::f64: + return AMDILCC::IL_CC_D_OGT; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOGE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OGE; + case MVT::f64: + return AMDILCC::IL_CC_D_OGE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOLT: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OLT; + case MVT::f64: + return AMDILCC::IL_CC_D_OLT; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOLE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OLE; + case MVT::f64: + return AMDILCC::IL_CC_D_OLE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETONE: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_ONE; + case MVT::f64: + return AMDILCC::IL_CC_D_ONE; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + case ISD::SETOEQ: + switch (type) { + case MVT::f32: + return AMDILCC::IL_CC_F_OEQ; + case MVT::f64: + return AMDILCC::IL_CC_D_OEQ; + case MVT::i1: + case MVT::i8: + case MVT::i16: + case MVT::i32: + case MVT::i64: + default: + assert(0 && "Opcode combination not generated correctly!"); + return AMDILCC::COND_ERROR; + }; + }; +} + +SDValue +AMDILTargetLowering::LowerMemArgument( + SDValue Chain, + CallingConv::ID CallConv, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + MachineFrameInfo *MFI, + unsigned i) const +{ + // Create the nodes corresponding to a load from this parameter slot. + ISD::ArgFlagsTy Flags = Ins[i].Flags; + + bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && + getTargetMachine().Options.GuaranteedTailCallOpt; + bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); + + // FIXME: For now, all byval parameter objects are marked mutable. This can + // be changed with more analysis. + // In case of tail call optimization mark all arguments mutable. Since they + // could be overwritten by lowering of arguments in case of a tail call. + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), isImmutable); + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + + if (Flags.isByVal()) + return FIN; + return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, false, 0); +} +//===----------------------------------------------------------------------===// +// TargetLowering Implementation Help Functions End +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// TargetLowering Class Implementation Begins +//===----------------------------------------------------------------------===// + AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) +: TargetLowering(TM, new TargetLoweringObjectFileELF()) +{ + int types[] = + { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::f32, + (int)MVT::f64, + (int)MVT::i64, + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + + int IntTypes[] = + { + (int)MVT::i8, + (int)MVT::i16, + (int)MVT::i32, + (int)MVT::i64 + }; + + int FloatTypes[] = + { + (int)MVT::f32, + (int)MVT::f64 + }; + + int VectorTypes[] = + { + (int)MVT::v2i8, + (int)MVT::v4i8, + (int)MVT::v2i16, + (int)MVT::v4i16, + (int)MVT::v4f32, + (int)MVT::v4i32, + (int)MVT::v2f32, + (int)MVT::v2i32, + (int)MVT::v2f64, + (int)MVT::v2i64 + }; + size_t numTypes = sizeof(types) / sizeof(*types); + size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); + size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); + size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); + + const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); + // These are the current register classes that are + // supported + + for (unsigned int x = 0; x < numTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; + + //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types + // We cannot sextinreg, expand to shifts + setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); + setOperationAction(ISD::SUBE, VT, Expand); + setOperationAction(ISD::SUBC, VT, Expand); + setOperationAction(ISD::ADDE, VT, Expand); + setOperationAction(ISD::ADDC, VT, Expand); + setOperationAction(ISD::SETCC, VT, Custom); + setOperationAction(ISD::BRCOND, VT, Custom); + setOperationAction(ISD::BR_CC, VT, Custom); + setOperationAction(ISD::BR_JT, VT, Expand); + setOperationAction(ISD::BRIND, VT, Expand); + // TODO: Implement custom UREM/SREM routines + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::GlobalAddress, VT, Custom); + setOperationAction(ISD::JumpTable, VT, Custom); + setOperationAction(ISD::ConstantPool, VT, Custom); + setOperationAction(ISD::SELECT, VT, Custom); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + if (VT != MVT::i64 && VT != MVT::v2i64) { + setOperationAction(ISD::SDIV, VT, Custom); + } + } + for (unsigned int x = 0; x < numFloatTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; + + // IL does not have these operations for floating point types + setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); + setOperationAction(ISD::SETOLT, VT, Expand); + setOperationAction(ISD::SETOGE, VT, Expand); + setOperationAction(ISD::SETOGT, VT, Expand); + setOperationAction(ISD::SETOLE, VT, Expand); + setOperationAction(ISD::SETULT, VT, Expand); + setOperationAction(ISD::SETUGE, VT, Expand); + setOperationAction(ISD::SETUGT, VT, Expand); + setOperationAction(ISD::SETULE, VT, Expand); + } + + for (unsigned int x = 0; x < numIntTypes; ++x) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; + + // GPU also does not have divrem function for signed or unsigned + setOperationAction(ISD::SDIVREM, VT, Expand); + + // GPU does not have [S|U]MUL_LOHI functions as a single instruction + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + + // GPU doesn't have a rotl, rotr, or byteswap instruction + setOperationAction(ISD::ROTR, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); + + // GPU doesn't have any counting operators + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + } + + for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) + { + MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + // setOperationAction(ISD::VSETCC, VT, Expand); + setOperationAction(ISD::SETCC, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::SELECT, VT, Expand); + + } + if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::v2i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::v2i64, Expand); + setOperationAction(ISD::ADD, MVT::v2i64, Expand); + setOperationAction(ISD::SREM, MVT::v2i64, Expand); + setOperationAction(ISD::Constant , MVT::i64 , Legal); + setOperationAction(ISD::SDIV, MVT::v2i64, Expand); + setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); + } + if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { + // we support loading/storing v2f64 but not operations on the type + setOperationAction(ISD::FADD, MVT::v2f64, Expand); + setOperationAction(ISD::FSUB, MVT::v2f64, Expand); + setOperationAction(ISD::FMUL, MVT::v2f64, Expand); + setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); + // We want to expand vector conversions into their scalar + // counterparts. + setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); + setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::v2f64, Expand); + } + // TODO: Fix the UDIV24 algorithm so it works for these + // types correctly. This needs vector comparisons + // for this to work correctly. + setOperationAction(ISD::UDIV, MVT::v2i8, Expand); + setOperationAction(ISD::UDIV, MVT::v4i8, Expand); + setOperationAction(ISD::UDIV, MVT::v2i16, Expand); + setOperationAction(ISD::UDIV, MVT::v4i16, Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); + setOperationAction(ISD::SUBC, MVT::Other, Expand); + setOperationAction(ISD::ADDE, MVT::Other, Expand); + setOperationAction(ISD::ADDC, MVT::Other, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); + setOperationAction(ISD::BR_CC, MVT::Other, Custom); + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); + setOperationAction(ISD::SETCC, MVT::Other, Custom); + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); + + setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); + // Use the default implementation. + setOperationAction(ISD::VAARG , MVT::Other, Expand); + setOperationAction(ISD::VACOPY , MVT::Other, Expand); + setOperationAction(ISD::VAEND , MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); + setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); + setOperationAction(ISD::Constant , MVT::i32 , Legal); + setOperationAction(ISD::TRAP , MVT::Other , Legal); + + setStackPointerRegisterToSaveRestore(AMDGPU::SP); + setSchedulingPreference(Sched::RegPressure); + setPow2DivIsCheap(false); + setPrefLoopAlignment(16); + setSelectIsExpensive(true); + setJumpIsExpensive(true); + + maxStoresPerMemcpy = 4096; + maxStoresPerMemmove = 4096; + maxStoresPerMemset = 4096; + +#undef numTypes +#undef numIntTypes +#undef numVectorTypes +#undef numFloatTypes +} + +const char * +AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const +{ + switch (Opcode) { + default: return 0; + case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; + case AMDILISD::MAD: return "AMDILISD::MAD"; + case AMDILISD::CALL: return "AMDILISD::CALL"; + case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC"; + case AMDILISD::UMUL: return "AMDILISD::UMUL"; + case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; + case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; + case AMDILISD::CMP: return "AMDILISD::CMP"; + case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; + case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; + case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; + case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; + case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; + case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; + case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; + case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; + + }; +} +bool +AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, unsigned Intrinsic) const +{ + return false; +} + +// The backend supports 32 and 64 bit floating point immediates +bool +AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const +{ + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return true; + } else { + return false; + } +} + +bool +AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const +{ + if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 + || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { + return false; + } else { + return true; + } +} + + +// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to +// be zero. Op is expected to be a target specific node. Used by DAG +// combiner. + +void +AMDILTargetLowering::computeMaskedBitsForTargetNode( + const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const +{ + APInt KnownZero2; + APInt KnownOne2; + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything + switch (Op.getOpcode()) { + default: break; + case AMDILISD::SELECT_CC: + DAG.ComputeMaskedBits( + Op.getOperand(1), + KnownZero, + KnownOne, + Depth + 1 + ); + DAG.ComputeMaskedBits( + Op.getOperand(0), + KnownZero2, + KnownOne2 + ); + assert((KnownZero & KnownOne) == 0 + && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 + && "Bits known to be one AND zero?"); + // Only known if known in both the LHS and RHS + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + }; +} + +// This is the function that determines which calling convention should +// be used. Currently there is only one calling convention +CCAssignFn* +AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const +{ + //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + return CC_AMDIL32; +} + +// LowerCallResult - Lower the result values of an ISD::CALL into the +// appropriate copies out of appropriate physical registers. This assumes that +// Chain/InFlag are the input chain/flag to use, and that TheCall is the call +// being lowered. The returns a SDNode with the same number of values as the +// ISD::CALL. +SDValue +AMDILTargetLowering::LowerCallResult( + SDValue Chain, + SDValue InFlag, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) const +{ + // Assign locations to each value returned by this call + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); + + // Copy all of the result registers out of their specified physreg. + for (unsigned i = 0; i != RVLocs.size(); ++i) { + EVT CopyVT = RVLocs[i].getValVT(); + if (RVLocs[i].isRegLoc()) { + Chain = DAG.getCopyFromReg( + Chain, + dl, + RVLocs[i].getLocReg(), + CopyVT, + InFlag + ).getValue(1); + SDValue Val = Chain.getValue(0); + InFlag = Chain.getValue(2); + InVals.push_back(Val); + } + } + + return Chain; + +} + +//===----------------------------------------------------------------------===// +// Other Lowering Hooks +//===----------------------------------------------------------------------===// + +// Recursively assign SDNodeOrdering to any unordered nodes +// This is necessary to maintain source ordering of instructions +// under -O0 to avoid odd-looking "skipping around" issues. + static const SDValue +Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) +{ + if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { + DAG.AssignOrdering( New.getNode(), order ); + for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) + Ordered( DAG, order, New.getOperand(i) ); + } + return New; +} + +#define LOWER(A) \ + case ISD:: A: \ +return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) + +SDValue +AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const +{ + switch (Op.getOpcode()) { + default: + Op.getNode()->dump(); + assert(0 && "Custom lowering code for this" + "instruction is not implemented yet!"); + break; + LOWER(GlobalAddress); + LOWER(JumpTable); + LOWER(ConstantPool); + LOWER(ExternalSymbol); + LOWER(SDIV); + LOWER(SREM); + LOWER(BUILD_VECTOR); + LOWER(SELECT); + LOWER(SETCC); + LOWER(SIGN_EXTEND_INREG); + LOWER(DYNAMIC_STACKALLOC); + LOWER(BRCOND); + LOWER(BR_CC); + } + return Op; +} + +#undef LOWER + +SDValue +AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const +{ + SDValue DST = Op; + const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); + const GlobalValue *G = GADN->getGlobal(); + DebugLoc DL = Op.getDebugLoc(); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); + if (!GV) { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } else { + if (GV->hasInitializer()) { + const Constant *C = dyn_cast<Constant>(GV->getInitializer()); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { + DST = DAG.getConstant(CI->getValue(), Op.getValueType()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { + DST = DAG.getConstantFP(CF->getValueAPF(), + Op.getValueType()); + } else if (dyn_cast<ConstantAggregateZero>(C)) { + EVT VT = Op.getValueType(); + if (VT.isInteger()) { + DST = DAG.getConstant(0, VT); + } else { + DST = DAG.getConstantFP(0, VT); + } + } else { + assert(!"lowering this type of Global Address " + "not implemented yet!"); + C->dump(); + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } + } else { + DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); + } + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const +{ + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); + SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); + return Result; +} +SDValue +AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const +{ + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + EVT PtrVT = Op.getValueType(); + SDValue Result; + if (CP->isMachineConstantPoolEntry()) { + Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, + CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); + } else { + Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, + CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); + } + return Result; +} + +SDValue +AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const +{ + const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); + SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); + return Result; +} + +/// LowerFORMAL_ARGUMENTS - transform physical registers into +/// virtual registers and generate load operations for +/// arguments places on the stack. +/// TODO: isVarArg, hasStructRet, isMemReg + SDValue +AMDILTargetLowering::LowerFormalArguments(SDValue Chain, + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, + SelectionDAG &DAG, + SmallVectorImpl<SDValue> &InVals) +const +{ + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + //const Function *Fn = MF.getFunction(); + //MachineRegisterInfo &RegInfo = MF.getRegInfo(); + + SmallVector<CCValAssign, 16> ArgLocs; + CallingConv::ID CC = MF.getFunction()->getCallingConv(); + //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); + + CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *DAG.getContext()); + + // When more calling conventions are added, they need to be chosen here + CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); + SDValue StackPtr; + + //unsigned int FirstStackArgLoc = 0; + + for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + EVT RegVT = VA.getLocVT(); + const TargetRegisterClass *RC = getRegClassFor( + RegVT.getSimpleVT().SimpleTy); + + unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); + SDValue ArgValue = DAG.getCopyFromReg( + Chain, + dl, + Reg, + RegVT); + // If this is an 8 or 16-bit value, it is really passed + // promoted to 32 bits. Insert an assert[sz]ext to capture + // this, then truncate to the right size. + + if (VA.getLocInfo() == CCValAssign::SExt) { + ArgValue = DAG.getNode( + ISD::AssertSext, + dl, + RegVT, + ArgValue, + DAG.getValueType(VA.getValVT())); + } else if (VA.getLocInfo() == CCValAssign::ZExt) { + ArgValue = DAG.getNode( + ISD::AssertZext, + dl, + RegVT, + ArgValue, + DAG.getValueType(VA.getValVT())); + } + if (VA.getLocInfo() != CCValAssign::Full) { + ArgValue = DAG.getNode( + ISD::TRUNCATE, + dl, + VA.getValVT(), + ArgValue); + } + // Add the value to the list of arguments + // to be passed in registers + InVals.push_back(ArgValue); + if (isVarArg) { + assert(0 && "Variable arguments are not yet supported"); + // See MipsISelLowering.cpp for ideas on how to implement + } + } else if(VA.isMemLoc()) { + InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, + dl, DAG, VA, MFI, i)); + } else { + assert(0 && "found a Value Assign that is " + "neither a register or a memory location"); + } + } + /*if (hasStructRet) { + assert(0 && "Has struct return is not yet implemented"); + // See MipsISelLowering.cpp for ideas on how to implement + }*/ + + if (isVarArg) { + assert(0 && "Variable arguments are not yet supported"); + // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement + } + // This needs to be changed to non-zero if the return function needs + // to pop bytes + return Chain; +} +/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified +/// by "Src" to address "Dst" with size and alignment information specified by +/// the specific parameter attribute. The copy will be passed as a byval +/// function parameter. +static SDValue +CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, + ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { + assert(0 && "MemCopy does not exist yet"); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); + + return DAG.getMemcpy(Chain, + Src.getDebugLoc(), + Dst, Src, SizeNode, Flags.getByValAlign(), + /*IsVol=*/false, /*AlwaysInline=*/true, + MachinePointerInfo(), MachinePointerInfo()); +} + +SDValue +AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, + SDValue StackPtr, SDValue Arg, + DebugLoc dl, SelectionDAG &DAG, + const CCValAssign &VA, + ISD::ArgFlagsTy Flags) const +{ + unsigned int LocMemOffset = VA.getLocMemOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); + PtrOff = DAG.getNode(ISD::ADD, + dl, + getPointerTy(), StackPtr, PtrOff); + if (Flags.isByVal()) { + PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); + } else { + PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, + MachinePointerInfo::getStack(LocMemOffset), + false, false, 0); + } + return PtrOff; +} +/// LowerCAL - functions arguments are copied from virtual +/// regs to (physical regs)/(stack frame), CALLSEQ_START and +/// CALLSEQ_END are emitted. +/// TODO: isVarArg, isTailCall, hasStructRet +SDValue +AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl<SDValue> &InVals) const + +#if 0 + SDValue Chain, SDValue Callee, + CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, + bool& isTailCall, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + const SmallVectorImpl<ISD::InputArg> &Ins, + DebugLoc dl, SelectionDAG &DAG, +#endif +{ + CLI.IsTailCall = false; + MachineFunction& MF = CLI.DAG.getMachineFunction(); + // FIXME: DO we need to handle fast calling conventions and tail call + // optimizations?? X86/PPC ISelLowering + /*bool hasStructRet = (TheCall->getNumArgs()) + ? TheCall->getArgFlags(0).device()->isSRet() + : false;*/ + + MachineFrameInfo *MFI = MF.getFrameInfo(); + + // Analyze operands of the call, assigning locations to each operand + SmallVector<CCValAssign, 16> ArgLocs; + CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(), + getTargetMachine(), ArgLocs, *CLI.DAG.getContext()); + // Analyize the calling operands, but need to change + // if we have more than one calling convetion + CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv)); + + unsigned int NumBytes = CCInfo.getNextStackOffset(); + if (CLI.IsTailCall) { + assert(CLI.IsTailCall && "Tail Call not handled yet!"); + // See X86/PPC ISelLowering + } + + CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain, + CLI.DAG.getIntPtrConstant(NumBytes, true)); + + SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; + SmallVector<SDValue, 8> MemOpChains; + SDValue StackPtr; + //unsigned int FirstStacArgLoc = 0; + //int LastArgStackLoc = 0; + + // Walk the register/memloc assignments, insert copies/loads + for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers + // Arguments start after the 5 first operands of ISD::CALL + SDValue Arg = CLI.OutVals[i]; + //Promote the value if needed + switch(VA.getLocInfo()) { + default: assert(0 && "Unknown loc info!"); + case CCValAssign::Full: + break; + case CCValAssign::SExt: + Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND, + CLI.DL, + VA.getLocVT(), Arg); + break; + case CCValAssign::ZExt: + Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND, + CLI.DL, + VA.getLocVT(), Arg); + break; + case CCValAssign::AExt: + Arg = CLI.DAG.getNode(ISD::ANY_EXTEND, + CLI.DL, + VA.getLocVT(), Arg); + break; + } + + if (VA.isRegLoc()) { + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else if (VA.isMemLoc()) { + // Create the frame index object for this incoming parameter + int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy()); + + // emit ISD::STORE whichs stores the + // parameter value to a stack Location + MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } else { + assert(0 && "Not a Reg/Mem Loc, major error!"); + } + } + if (!MemOpChains.empty()) { + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, + CLI.DL, + MVT::Other, + &MemOpChains[0], + MemOpChains.size()); + } + SDValue InFlag; + if (!CLI.IsTailCall) { + for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { + CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain, + CLI.DL, + RegsToPass[i].first, + RegsToPass[i].second, + InFlag); + InFlag = CLI.Chain.getValue(1); + } + } + + // If the callee is a GlobalAddress/ExternalSymbol node (quite common, + // every direct call is) turn it into a TargetGlobalAddress/ + // TargetExternalSymbol + // node so that legalize doesn't hack it. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) { + CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy()); + } + else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) { + CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); + } + else if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1708 + } + + SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE); + SmallVector<SDValue, 8> Ops; + + if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1721 + } + // If this is a direct call, pass the chain and the callee + if (CLI.Callee.getNode()) { + Ops.push_back(CLI.Chain); + Ops.push_back(CLI.Callee); + } + + if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1739 + } + + // Add argument registers to the end of the list so that they are known + // live into the call + for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { + Ops.push_back(CLI.DAG.getRegister( + RegsToPass[i].first, + RegsToPass[i].second.getValueType())); + } + if (InFlag.getNode()) { + Ops.push_back(InFlag); + } + + // Emit Tail Call + if (CLI.IsTailCall) { + assert(0 && "Tail calls are not handled yet"); + // see X86 ISelLowering for ideas on implementation: 1762 + } + + CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL, + CLI.DL, + NodeTys, &Ops[0], Ops.size()); + InFlag = CLI.Chain.getValue(1); + + // Create the CALLSEQ_END node + CLI.Chain = CLI.DAG.getCALLSEQ_END( + CLI.Chain, + CLI.DAG.getIntPtrConstant(NumBytes, true), + CLI.DAG.getIntPtrConstant(0, true), + InFlag); + InFlag = CLI.Chain.getValue(1); + // Handle result values, copying them out of physregs into vregs that + // we return + return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG, + InVals); +} + +SDValue +AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSDIV64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSDIV32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16 + || OVT.getScalarType() == MVT::i8) { + DST = LowerSDIV24(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const +{ + EVT OVT = Op.getValueType(); + SDValue DST; + if (OVT.getScalarType() == MVT::i64) { + DST = LowerSREM64(Op, DAG); + } else if (OVT.getScalarType() == MVT::i32) { + DST = LowerSREM32(Op, DAG); + } else if (OVT.getScalarType() == MVT::i16) { + DST = LowerSREM16(Op, DAG); + } else if (OVT.getScalarType() == MVT::i8) { + DST = LowerSREM8(Op, DAG); + } else { + DST = SDValue(Op.getNode(), 0); + } + return DST; +} + +SDValue +AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const +{ + EVT VT = Op.getValueType(); + SDValue Nodes1; + SDValue second; + SDValue third; + SDValue fourth; + DebugLoc DL = Op.getDebugLoc(); + Nodes1 = DAG.getNode(AMDILISD::VBUILD, + DL, + VT, Op.getOperand(0)); +#if 0 + bool allEqual = true; + for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { + if (Op.getOperand(0) != Op.getOperand(x)) { + allEqual = false; + break; + } + } + if (allEqual) { + return Nodes1; + } +#endif + switch(Op.getNumOperands()) { + default: + case 1: + break; + case 4: + fourth = Op.getOperand(3); + if (fourth.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + fourth, + DAG.getConstant(7, MVT::i32)); + } + case 3: + third = Op.getOperand(2); + if (third.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + third, + DAG.getConstant(6, MVT::i32)); + } + case 2: + second = Op.getOperand(1); + if (second.getOpcode() != ISD::UNDEF) { + Nodes1 = DAG.getNode( + ISD::INSERT_VECTOR_ELT, + DL, + Op.getValueType(), + Nodes1, + second, + DAG.getConstant(5, MVT::i32)); + } + break; + }; + return Nodes1; +} + +SDValue +AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Cond = Op.getOperand(0); + SDValue LHS = Op.getOperand(1); + SDValue RHS = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + Cond = getConversionNode(DAG, Cond, Op, true); + Cond = DAG.getNode(AMDILISD::CMOVLOG, + DL, + Op.getValueType(), Cond, LHS, RHS); + return Cond; +} +SDValue +AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Cond; + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + DebugLoc DL = Op.getDebugLoc(); + ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); + unsigned int AMDILCC = CondCCodeToCC( + SetCCOpcode, + LHS.getValueType().getSimpleVT().SimpleTy); + assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); + Cond = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + LHS.getValueType(), + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + Cond = getConversionNode(DAG, Cond, Op, true); + Cond = DAG.getNode( + ISD::AND, + DL, + Cond.getValueType(), + DAG.getConstant(1, Cond.getValueType()), + Cond); + return Cond; +} + +SDValue +AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Data = Op.getOperand(0); + VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); + DebugLoc DL = Op.getDebugLoc(); + EVT DVT = Data.getValueType(); + EVT BVT = BaseType->getVT(); + unsigned baseBits = BVT.getScalarType().getSizeInBits(); + unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; + unsigned shiftBits = srcBits - baseBits; + if (srcBits < 32) { + // If the op is less than 32 bits, then it needs to extend to 32bits + // so it can properly keep the upper bits valid. + EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); + Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); + shiftBits = 32 - baseBits; + DVT = IVT; + } + SDValue Shift = DAG.getConstant(shiftBits, DVT); + // Shift left by 'Shift' bits. + Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); + // Signed shift Right by 'Shift' bits. + Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); + if (srcBits < 32) { + // Once the sign extension is done, the op needs to be converted to + // its original type. + Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); + } + return Data; +} +EVT +AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const +{ + int iSize = (size * numEle); + int vEle = (iSize >> ((size == 64) ? 6 : 5)); + if (!vEle) { + vEle = 1; + } + if (size == 64) { + if (vEle == 1) { + return EVT(MVT::i64); + } else { + return EVT(MVT::getVectorVT(MVT::i64, vEle)); + } + } else { + if (vEle == 1) { + return EVT(MVT::i32); + } else { + return EVT(MVT::getVectorVT(MVT::i32, vEle)); + } + } +} + +SDValue +AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, + SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + unsigned int SPReg = AMDGPU::SP; + DebugLoc DL = Op.getDebugLoc(); + SDValue SP = DAG.getCopyFromReg(Chain, + DL, + SPReg, MVT::i32); + SDValue NewSP = DAG.getNode(ISD::ADD, + DL, + MVT::i32, SP, Size); + Chain = DAG.getCopyToReg(SP.getValue(1), + DL, + SPReg, NewSP); + SDValue Ops[2] = {NewSP, Chain}; + Chain = DAG.getMergeValues(Ops, 2 ,DL); + return Chain; +} +SDValue +AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Jump = Op.getOperand(2); + SDValue Result; + Result = DAG.getNode( + AMDILISD::BRANCH_COND, + Op.getDebugLoc(), + Op.getValueType(), + Chain, Jump, Cond); + return Result; +} + +SDValue +AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const +{ + SDValue Chain = Op.getOperand(0); + SDValue CC = Op.getOperand(1); + SDValue LHS = Op.getOperand(2); + SDValue RHS = Op.getOperand(3); + SDValue JumpT = Op.getOperand(4); + SDValue CmpValue; + SDValue Result; + CmpValue = DAG.getNode( + ISD::SELECT_CC, + Op.getDebugLoc(), + LHS.getValueType(), + LHS, RHS, + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + CC); + Result = DAG.getNode( + AMDILISD::BRANCH_COND, + CmpValue.getDebugLoc(), + MVT::Other, Chain, + JumpT, CmpValue); + return Result; +} + +// LowerRET - Lower an ISD::RET node. +SDValue +AMDILTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<ISD::OutputArg> &Outs, + const SmallVectorImpl<SDValue> &OutVals, + DebugLoc dl, SelectionDAG &DAG) +const +{ + //MachineFunction& MF = DAG.getMachineFunction(); + // CCValAssign - represent the assignment of the return value + // to a location + SmallVector<CCValAssign, 16> RVLocs; + + // CCState - Info about the registers and stack slot + CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), + getTargetMachine(), RVLocs, *DAG.getContext()); + + // Analyze return values of ISD::RET + CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); + // If this is the first return lowered for this function, add + // the regs to the liveout set for the function + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { + if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { + MRI.addLiveOut(RVLocs[i].getLocReg()); + } + } + // FIXME: implement this when tail call is implemented + // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); + // both x86 and ppc implement this in ISelLowering + + // Regular return here + SDValue Flag; + SmallVector<SDValue, 6> RetOps; + RetOps.push_back(Chain); + RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); + for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { + CCValAssign &VA = RVLocs[i]; + SDValue ValToCopy = OutVals[i]; + assert(VA.isRegLoc() && "Can only return in registers!"); + // ISD::Ret => ret chain, (regnum1, val1), ... + // So i * 2 + 1 index only the regnums + Chain = DAG.getCopyToReg(Chain, + dl, + VA.getLocReg(), + ValToCopy, + Flag); + // guarantee that all emitted copies are stuck together + // avoiding something bad + Flag = Chain.getValue(1); + } + /*if (MF.getFunction()->hasStructRetAttr()) { + assert(0 && "Struct returns are not yet implemented!"); + // Both MIPS and X86 have this + }*/ + RetOps[0] = Chain; + if (Flag.getNode()) + RetOps.push_back(Flag); + + Flag = DAG.getNode(AMDILISD::RET_FLAG, + dl, + MVT::Other, &RetOps[0], RetOps.size()); + return Flag; +} + +unsigned int +AMDILTargetLowering::getFunctionAlignment(const Function *) const +{ + return 0; +} + +SDValue +AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + MVT INTTY; + MVT FLTTY; + if (!OVT.isVector()) { + INTTY = MVT::i32; + FLTTY = MVT::f32; + } else if (OVT.getVectorNumElements() == 2) { + INTTY = MVT::v2i32; + FLTTY = MVT::v2f32; + } else if (OVT.getVectorNumElements() == 4) { + INTTY = MVT::v4i32; + FLTTY = MVT::v4f32; + } + unsigned bitsize = OVT.getScalarType().getSizeInBits(); + // char|short jq = ia ^ ib; + SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); + + // jq = jq >> (bitsize - 2) + jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); + + // jq = jq | 0x1 + jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); + + // jq = (int)jq + jq = DAG.getSExtOrTrunc(jq, DL, INTTY); + + // int ia = (int)LHS; + SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); + + // int ib, (int)RHS; + SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); + + // float fa = (float)ia; + SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); + + // float fb = (float)ib; + SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); + + // float fq = native_divide(fa, fb); + SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); + + // fq = trunc(fq); + fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); + + // float fqneg = -fq; + SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); + + // float fr = mad(fqneg, fb, fa); + SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); + + // int iq = (int)fq; + SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); + + // fr = fabs(fr); + fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); + + // fb = fabs(fb); + fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); + + // int cv = fr >= fb; + SDValue cv; + if (INTTY == MVT::i32) { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } else { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } + // jq = (cv ? jq : 0); + jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, + DAG.getConstant(0, OVT)); + // dst = iq + jq; + iq = DAG.getSExtOrTrunc(iq, DL, OVT); + iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); + return iq; +} + +SDValue +AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSDIV32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r0, r0, r1 + // ixor r10, r10, r11 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getSelectCC(DL, + r0, DAG.getConstant(0, OVT), + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + ISD::SETLT); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getSelectCC(DL, + r1, DAG.getConstant(0, OVT), + DAG.getConstant(-1, MVT::i32), + DAG.getConstant(0, MVT::i32), + ISD::SETLT); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r0, r0, r1 + r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); + + // ixor r10, r10, r11 + r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} + +SDValue +AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i8) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i8) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + MVT INTTY = MVT::i32; + if (OVT == MVT::v2i16) { + INTTY = MVT::v2i32; + } else if (OVT == MVT::v4i16) { + INTTY = MVT::v4i32; + } + SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); + SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); + LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); + LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); + return LHS; +} + +SDValue +AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const +{ + DebugLoc DL = Op.getDebugLoc(); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSREM32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r20, r0, r1 + // umul r20, r20, r1 + // sub r0, r0, r20 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r0, DAG.getConstant(0, OVT)); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, + DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), + r1, DAG.getConstant(0, OVT)); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r20, r0, r1 + SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); + + // umul r20, r20, r1 + r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); + + // sub r0, r0, r20 + r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue +AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const +{ + return SDValue(Op.getNode(), 0); +} |