summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp1850
1 files changed, 1850 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp
new file mode 100644
index 00000000000..af991229ba9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDILISelLowering.cpp
@@ -0,0 +1,1850 @@
+//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//==-----------------------------------------------------------------------===//
+//
+// This file implements the interfaces that AMDIL uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDILISelLowering.h"
+#include "AMDILDevices.h"
+#include "AMDILIntrinsicInfo.h"
+#include "AMDILRegisterInfo.h"
+#include "AMDILSubtarget.h"
+#include "AMDILUtilityFunctions.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetOptions.h"
+
+using namespace llvm;
+#define ISDBITCAST ISD::BITCAST
+#define MVTGLUE MVT::Glue
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+#include "AMDGPUGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions Begin
+//===----------------------------------------------------------------------===//
+ static SDValue
+getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
+{
+ DebugLoc DL = Src.getDebugLoc();
+ EVT svt = Src.getValueType().getScalarType();
+ EVT dvt = Dst.getValueType().getScalarType();
+ if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
+ if (dvt.bitsGT(svt)) {
+ Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
+ } else if (svt.bitsLT(svt)) {
+ Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
+ DAG.getConstant(1, MVT::i32));
+ }
+ } else if (svt.isInteger() && dvt.isInteger()) {
+ if (!svt.bitsEq(dvt)) {
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ } else if (svt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
+ if (!svt.bitsEq(dvt)) {
+ if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
+ } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ }
+ Src = DAG.getNode(opcode, DL, dvt, Src);
+ } else if (dvt.isInteger()) {
+ unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
+ if (svt.getSimpleVT().SimpleTy == MVT::f32) {
+ Src = DAG.getNode(opcode, DL, MVT::i32, Src);
+ } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
+ Src = DAG.getNode(opcode, DL, MVT::i64, Src);
+ } else {
+ assert(0 && "We only support 32 and 64bit fp types");
+ }
+ Src = DAG.getSExtOrTrunc(Src, DL, dvt);
+ }
+ return Src;
+}
+// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
+// condition.
+ static AMDILCC::CondCodes
+CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
+{
+ switch (CC) {
+ default:
+ {
+ errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
+ assert(0 && "Unknown condition code!");
+ }
+ case ISD::SETO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_O;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_O;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUO:
+ switch(type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UO;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UO;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_GE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_GE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETLE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_LE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_LE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_NE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_NE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_I_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_EQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_EQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_L_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUGE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_GE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UGE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_GE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULT:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LT;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULT;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LT;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETULE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_LE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ULE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ULE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_LE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUNE:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_NE;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UNE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UNE;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_NE;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETUEQ:
+ switch (type) {
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ return AMDILCC::IL_CC_U_EQ;
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_UEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_UEQ;
+ case MVT::i64:
+ return AMDILCC::IL_CC_UL_EQ;
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOGE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OGE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OGE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLT:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLT;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLT;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOLE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OLE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OLE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETONE:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_ONE;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_ONE;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ case ISD::SETOEQ:
+ switch (type) {
+ case MVT::f32:
+ return AMDILCC::IL_CC_F_OEQ;
+ case MVT::f64:
+ return AMDILCC::IL_CC_D_OEQ;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
+ case MVT::i64:
+ default:
+ assert(0 && "Opcode combination not generated correctly!");
+ return AMDILCC::COND_ERROR;
+ };
+ };
+}
+
+SDValue
+AMDILTargetLowering::LowerMemArgument(
+ SDValue Chain,
+ CallingConv::ID CallConv,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ MachineFrameInfo *MFI,
+ unsigned i) const
+{
+ // Create the nodes corresponding to a load from this parameter slot.
+ ISD::ArgFlagsTy Flags = Ins[i].Flags;
+
+ bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
+ getTargetMachine().Options.GuaranteedTailCallOpt;
+ bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
+
+ // FIXME: For now, all byval parameter objects are marked mutable. This can
+ // be changed with more analysis.
+ // In case of tail call optimization mark all arguments mutable. Since they
+ // could be overwritten by lowering of arguments in case of a tail call.
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), isImmutable);
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+
+ if (Flags.isByVal())
+ return FIN;
+ return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, false, 0);
+}
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation Help Functions End
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Class Implementation Begins
+//===----------------------------------------------------------------------===//
+ AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
+: TargetLowering(TM, new TargetLoweringObjectFileELF())
+{
+ int types[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::f32,
+ (int)MVT::f64,
+ (int)MVT::i64,
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+
+ int IntTypes[] =
+ {
+ (int)MVT::i8,
+ (int)MVT::i16,
+ (int)MVT::i32,
+ (int)MVT::i64
+ };
+
+ int FloatTypes[] =
+ {
+ (int)MVT::f32,
+ (int)MVT::f64
+ };
+
+ int VectorTypes[] =
+ {
+ (int)MVT::v2i8,
+ (int)MVT::v4i8,
+ (int)MVT::v2i16,
+ (int)MVT::v4i16,
+ (int)MVT::v4f32,
+ (int)MVT::v4i32,
+ (int)MVT::v2f32,
+ (int)MVT::v2i32,
+ (int)MVT::v2f64,
+ (int)MVT::v2i64
+ };
+ size_t numTypes = sizeof(types) / sizeof(*types);
+ size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
+ size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
+ size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
+
+ const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
+ // These are the current register classes that are
+ // supported
+
+ for (unsigned int x = 0; x < numTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
+
+ //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
+ // We cannot sextinreg, expand to shifts
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
+ setOperationAction(ISD::SUBE, VT, Expand);
+ setOperationAction(ISD::SUBC, VT, Expand);
+ setOperationAction(ISD::ADDE, VT, Expand);
+ setOperationAction(ISD::ADDC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::BRCOND, VT, Custom);
+ setOperationAction(ISD::BR_CC, VT, Custom);
+ setOperationAction(ISD::BR_JT, VT, Expand);
+ setOperationAction(ISD::BRIND, VT, Expand);
+ // TODO: Implement custom UREM/SREM routines
+ setOperationAction(ISD::SREM, VT, Expand);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ if (VT != MVT::i64 && VT != MVT::v2i64) {
+ setOperationAction(ISD::SDIV, VT, Custom);
+ }
+ }
+ for (unsigned int x = 0; x < numFloatTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
+
+ // IL does not have these operations for floating point types
+ setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
+ setOperationAction(ISD::SETOLT, VT, Expand);
+ setOperationAction(ISD::SETOGE, VT, Expand);
+ setOperationAction(ISD::SETOGT, VT, Expand);
+ setOperationAction(ISD::SETOLE, VT, Expand);
+ setOperationAction(ISD::SETULT, VT, Expand);
+ setOperationAction(ISD::SETUGE, VT, Expand);
+ setOperationAction(ISD::SETUGT, VT, Expand);
+ setOperationAction(ISD::SETULE, VT, Expand);
+ }
+
+ for (unsigned int x = 0; x < numIntTypes; ++x) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
+
+ // GPU also does not have divrem function for signed or unsigned
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+
+ // GPU does not have [S|U]MUL_LOHI functions as a single instruction
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+
+ // GPU doesn't have a rotl, rotr, or byteswap instruction
+ setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::BSWAP, VT, Expand);
+
+ // GPU doesn't have any counting operators
+ setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction(ISD::CTTZ, VT, Expand);
+ setOperationAction(ISD::CTLZ, VT, Expand);
+ }
+
+ for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
+ {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
+ setOperationAction(ISD::SDIVREM, VT, Expand);
+ setOperationAction(ISD::SMUL_LOHI, VT, Expand);
+ // setOperationAction(ISD::VSETCC, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Expand);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::SELECT, VT, Expand);
+
+ }
+ if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
+ setOperationAction(ISD::ADD, MVT::v2i64, Expand);
+ setOperationAction(ISD::SREM, MVT::v2i64, Expand);
+ setOperationAction(ISD::Constant , MVT::i64 , Legal);
+ setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
+ }
+ if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
+ // we support loading/storing v2f64 but not operations on the type
+ setOperationAction(ISD::FADD, MVT::v2f64, Expand);
+ setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
+ setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
+ setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ConstantFP , MVT::f64 , Legal);
+ // We want to expand vector conversions into their scalar
+ // counterparts.
+ setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
+ setOperationAction(ISD::FABS, MVT::f64, Expand);
+ setOperationAction(ISD::FABS, MVT::v2f64, Expand);
+ }
+ // TODO: Fix the UDIV24 algorithm so it works for these
+ // types correctly. This needs vector comparisons
+ // for this to work correctly.
+ setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
+ setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
+ setOperationAction(ISD::SUBC, MVT::Other, Expand);
+ setOperationAction(ISD::ADDE, MVT::Other, Expand);
+ setOperationAction(ISD::ADDC, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ setOperationAction(ISD::BR_CC, MVT::Other, Custom);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRIND, MVT::Other, Expand);
+ setOperationAction(ISD::SETCC, MVT::Other, Custom);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
+ // Use the default implementation.
+ setOperationAction(ISD::VAARG , MVT::Other, Expand);
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+ setOperationAction(ISD::VAEND , MVT::Other, Expand);
+ setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
+ setOperationAction(ISD::ConstantFP , MVT::f32 , Legal);
+ setOperationAction(ISD::Constant , MVT::i32 , Legal);
+ setOperationAction(ISD::TRAP , MVT::Other , Legal);
+
+ setStackPointerRegisterToSaveRestore(AMDGPU::SP);
+ setSchedulingPreference(Sched::RegPressure);
+ setPow2DivIsCheap(false);
+ setPrefLoopAlignment(16);
+ setSelectIsExpensive(true);
+ setJumpIsExpensive(true);
+
+ maxStoresPerMemcpy = 4096;
+ maxStoresPerMemmove = 4096;
+ maxStoresPerMemset = 4096;
+
+#undef numTypes
+#undef numIntTypes
+#undef numVectorTypes
+#undef numFloatTypes
+}
+
+const char *
+AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
+{
+ switch (Opcode) {
+ default: return 0;
+ case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG";
+ case AMDILISD::MAD: return "AMDILISD::MAD";
+ case AMDILISD::CALL: return "AMDILISD::CALL";
+ case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
+ case AMDILISD::UMUL: return "AMDILISD::UMUL";
+ case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
+ case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
+ case AMDILISD::CMP: return "AMDILISD::CMP";
+ case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
+ case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
+ case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
+ case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
+ case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
+ case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
+ case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
+ case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
+
+ };
+}
+bool
+AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I, unsigned Intrinsic) const
+{
+ return false;
+}
+
+// The backend supports 32 and 64 bit floating point immediates
+bool
+AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+bool
+AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
+{
+ if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
+ || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
+ return false;
+ } else {
+ return true;
+ }
+}
+
+
+// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
+// be zero. Op is expected to be a target specific node. Used by DAG
+// combiner.
+
+void
+AMDILTargetLowering::computeMaskedBitsForTargetNode(
+ const SDValue Op,
+ APInt &KnownZero,
+ APInt &KnownOne,
+ const SelectionDAG &DAG,
+ unsigned Depth) const
+{
+ APInt KnownZero2;
+ APInt KnownOne2;
+ KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
+ switch (Op.getOpcode()) {
+ default: break;
+ case AMDILISD::SELECT_CC:
+ DAG.ComputeMaskedBits(
+ Op.getOperand(1),
+ KnownZero,
+ KnownOne,
+ Depth + 1
+ );
+ DAG.ComputeMaskedBits(
+ Op.getOperand(0),
+ KnownZero2,
+ KnownOne2
+ );
+ assert((KnownZero & KnownOne) == 0
+ && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0
+ && "Bits known to be one AND zero?");
+ // Only known if known in both the LHS and RHS
+ KnownOne &= KnownOne2;
+ KnownZero &= KnownZero2;
+ break;
+ };
+}
+
+// This is the function that determines which calling convention should
+// be used. Currently there is only one calling convention
+CCAssignFn*
+AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
+{
+ //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ return CC_AMDIL32;
+}
+
+// LowerCallResult - Lower the result values of an ISD::CALL into the
+// appropriate copies out of appropriate physical registers. This assumes that
+// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
+// being lowered. The returns a SDNode with the same number of values as the
+// ISD::CALL.
+SDValue
+AMDILTargetLowering::LowerCallResult(
+ SDValue Chain,
+ SDValue InFlag,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) const
+{
+ // Assign locations to each value returned by this call
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ EVT CopyVT = RVLocs[i].getValVT();
+ if (RVLocs[i].isRegLoc()) {
+ Chain = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ RVLocs[i].getLocReg(),
+ CopyVT,
+ InFlag
+ ).getValue(1);
+ SDValue Val = Chain.getValue(0);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Val);
+ }
+ }
+
+ return Chain;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Other Lowering Hooks
+//===----------------------------------------------------------------------===//
+
+// Recursively assign SDNodeOrdering to any unordered nodes
+// This is necessary to maintain source ordering of instructions
+// under -O0 to avoid odd-looking "skipping around" issues.
+ static const SDValue
+Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
+{
+ if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
+ DAG.AssignOrdering( New.getNode(), order );
+ for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
+ Ordered( DAG, order, New.getOperand(i) );
+ }
+ return New;
+}
+
+#define LOWER(A) \
+ case ISD:: A: \
+return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
+
+SDValue
+AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
+{
+ switch (Op.getOpcode()) {
+ default:
+ Op.getNode()->dump();
+ assert(0 && "Custom lowering code for this"
+ "instruction is not implemented yet!");
+ break;
+ LOWER(GlobalAddress);
+ LOWER(JumpTable);
+ LOWER(ConstantPool);
+ LOWER(ExternalSymbol);
+ LOWER(SDIV);
+ LOWER(SREM);
+ LOWER(BUILD_VECTOR);
+ LOWER(SELECT);
+ LOWER(SETCC);
+ LOWER(SIGN_EXTEND_INREG);
+ LOWER(DYNAMIC_STACKALLOC);
+ LOWER(BRCOND);
+ LOWER(BR_CC);
+ }
+ return Op;
+}
+
+#undef LOWER
+
+SDValue
+AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue DST = Op;
+ const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
+ const GlobalValue *G = GADN->getGlobal();
+ DebugLoc DL = Op.getDebugLoc();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
+ if (!GV) {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ } else {
+ if (GV->hasInitializer()) {
+ const Constant *C = dyn_cast<Constant>(GV->getInitializer());
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
+ DST = DAG.getConstant(CI->getValue(), Op.getValueType());
+ } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
+ DST = DAG.getConstantFP(CF->getValueAPF(),
+ Op.getValueType());
+ } else if (dyn_cast<ConstantAggregateZero>(C)) {
+ EVT VT = Op.getValueType();
+ if (VT.isInteger()) {
+ DST = DAG.getConstant(0, VT);
+ } else {
+ DST = DAG.getConstantFP(0, VT);
+ }
+ } else {
+ assert(!"lowering this type of Global Address "
+ "not implemented yet!");
+ C->dump();
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ } else {
+ DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
+ }
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
+{
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+ SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+ return Result;
+}
+SDValue
+AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
+{
+ ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
+ EVT PtrVT = Op.getValueType();
+ SDValue Result;
+ if (CP->isMachineConstantPoolEntry()) {
+ Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ } else {
+ Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
+ CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
+ }
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
+{
+ const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+ SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
+ return Result;
+}
+
+/// LowerFORMAL_ARGUMENTS - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg, hasStructRet, isMemReg
+ SDValue
+AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl,
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals)
+const
+{
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ //const Function *Fn = MF.getFunction();
+ //MachineRegisterInfo &RegInfo = MF.getRegInfo();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CallingConv::ID CC = MF.getFunction()->getCallingConv();
+ //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
+
+ CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *DAG.getContext());
+
+ // When more calling conventions are added, they need to be chosen here
+ CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
+ SDValue StackPtr;
+
+ //unsigned int FirstStackArgLoc = 0;
+
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ const TargetRegisterClass *RC = getRegClassFor(
+ RegVT.getSimpleVT().SimpleTy);
+
+ unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(
+ Chain,
+ dl,
+ Reg,
+ RegVT);
+ // If this is an 8 or 16-bit value, it is really passed
+ // promoted to 32 bits. Insert an assert[sz]ext to capture
+ // this, then truncate to the right size.
+
+ if (VA.getLocInfo() == CCValAssign::SExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertSext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ } else if (VA.getLocInfo() == CCValAssign::ZExt) {
+ ArgValue = DAG.getNode(
+ ISD::AssertZext,
+ dl,
+ RegVT,
+ ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ }
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ ArgValue = DAG.getNode(
+ ISD::TRUNCATE,
+ dl,
+ VA.getValVT(),
+ ArgValue);
+ }
+ // Add the value to the list of arguments
+ // to be passed in registers
+ InVals.push_back(ArgValue);
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }
+ } else if(VA.isMemLoc()) {
+ InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
+ dl, DAG, VA, MFI, i));
+ } else {
+ assert(0 && "found a Value Assign that is "
+ "neither a register or a memory location");
+ }
+ }
+ /*if (hasStructRet) {
+ assert(0 && "Has struct return is not yet implemented");
+ // See MipsISelLowering.cpp for ideas on how to implement
+ }*/
+
+ if (isVarArg) {
+ assert(0 && "Variable arguments are not yet supported");
+ // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
+ }
+ // This needs to be changed to non-zero if the return function needs
+ // to pop bytes
+ return Chain;
+}
+/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
+/// by "Src" to address "Dst" with size and alignment information specified by
+/// the specific parameter attribute. The copy will be passed as a byval
+/// function parameter.
+static SDValue
+CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
+ ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
+ assert(0 && "MemCopy does not exist yet");
+ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
+
+ return DAG.getMemcpy(Chain,
+ Src.getDebugLoc(),
+ Dst, Src, SizeNode, Flags.getByValAlign(),
+ /*IsVol=*/false, /*AlwaysInline=*/true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
+SDValue
+AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
+ SDValue StackPtr, SDValue Arg,
+ DebugLoc dl, SelectionDAG &DAG,
+ const CCValAssign &VA,
+ ISD::ArgFlagsTy Flags) const
+{
+ unsigned int LocMemOffset = VA.getLocMemOffset();
+ SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
+ PtrOff = DAG.getNode(ISD::ADD,
+ dl,
+ getPointerTy(), StackPtr, PtrOff);
+ if (Flags.isByVal()) {
+ PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
+ } else {
+ PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
+ MachinePointerInfo::getStack(LocMemOffset),
+ false, false, 0);
+ }
+ return PtrOff;
+}
+/// LowerCAL - functions arguments are copied from virtual
+/// regs to (physical regs)/(stack frame), CALLSEQ_START and
+/// CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall, hasStructRet
+SDValue
+AMDILTargetLowering::LowerCall(CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const
+
+#if 0
+ SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
+ bool& isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+#endif
+{
+ CLI.IsTailCall = false;
+ MachineFunction& MF = CLI.DAG.getMachineFunction();
+ // FIXME: DO we need to handle fast calling conventions and tail call
+ // optimizations?? X86/PPC ISelLowering
+ /*bool hasStructRet = (TheCall->getNumArgs())
+ ? TheCall->getArgFlags(0).device()->isSRet()
+ : false;*/
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CLI.CallConv, CLI.IsVarArg, CLI.DAG.getMachineFunction(),
+ getTargetMachine(), ArgLocs, *CLI.DAG.getContext());
+ // Analyize the calling operands, but need to change
+ // if we have more than one calling convetion
+ CCInfo.AnalyzeCallOperands(CLI.Outs, CCAssignFnForNode(CLI.CallConv));
+
+ unsigned int NumBytes = CCInfo.getNextStackOffset();
+ if (CLI.IsTailCall) {
+ assert(CLI.IsTailCall && "Tail Call not handled yet!");
+ // See X86/PPC ISelLowering
+ }
+
+ CLI.Chain = CLI.DAG.getCALLSEQ_START(CLI.Chain,
+ CLI.DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+ SDValue StackPtr;
+ //unsigned int FirstStacArgLoc = 0;
+ //int LastArgStackLoc = 0;
+
+ // Walk the register/memloc assignments, insert copies/loads
+ for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
+ // Arguments start after the 5 first operands of ISD::CALL
+ SDValue Arg = CLI.OutVals[i];
+ //Promote the value if needed
+ switch(VA.getLocInfo()) {
+ default: assert(0 && "Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = CLI.DAG.getNode(ISD::SIGN_EXTEND,
+ CLI.DL,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = CLI.DAG.getNode(ISD::ZERO_EXTEND,
+ CLI.DL,
+ VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = CLI.DAG.getNode(ISD::ANY_EXTEND,
+ CLI.DL,
+ VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else if (VA.isMemLoc()) {
+ // Create the frame index object for this incoming parameter
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ VA.getLocMemOffset(), true);
+ SDValue PtrOff = CLI.DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(CLI.DAG.getStore(CLI.Chain, CLI.DL, Arg, PtrOff,
+ MachinePointerInfo::getFixedStack(FI),
+ false, false, 0));
+ } else {
+ assert(0 && "Not a Reg/Mem Loc, major error!");
+ }
+ }
+ if (!MemOpChains.empty()) {
+ CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor,
+ CLI.DL,
+ MVT::Other,
+ &MemOpChains[0],
+ MemOpChains.size());
+ }
+ SDValue InFlag;
+ if (!CLI.IsTailCall) {
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ CLI.Chain = CLI.DAG.getCopyToReg(CLI.Chain,
+ CLI.DL,
+ RegsToPass[i].first,
+ RegsToPass[i].second,
+ InFlag);
+ InFlag = CLI.Chain.getValue(1);
+ }
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
+ // every direct call is) turn it into a TargetGlobalAddress/
+ // TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(CLI.Callee)) {
+ CLI.Callee = CLI.DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, getPointerTy());
+ }
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(CLI.Callee)) {
+ CLI.Callee = CLI.DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
+ }
+ else if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1708
+ }
+
+ SDVTList NodeTys = CLI.DAG.getVTList(MVT::Other, MVTGLUE);
+ SmallVector<SDValue, 8> Ops;
+
+ if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1721
+ }
+ // If this is a direct call, pass the chain and the callee
+ if (CLI.Callee.getNode()) {
+ Ops.push_back(CLI.Chain);
+ Ops.push_back(CLI.Callee);
+ }
+
+ if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1739
+ }
+
+ // Add argument registers to the end of the list so that they are known
+ // live into the call
+ for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(CLI.DAG.getRegister(
+ RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+ if (InFlag.getNode()) {
+ Ops.push_back(InFlag);
+ }
+
+ // Emit Tail Call
+ if (CLI.IsTailCall) {
+ assert(0 && "Tail calls are not handled yet");
+ // see X86 ISelLowering for ideas on implementation: 1762
+ }
+
+ CLI.Chain = CLI.DAG.getNode(AMDILISD::CALL,
+ CLI.DL,
+ NodeTys, &Ops[0], Ops.size());
+ InFlag = CLI.Chain.getValue(1);
+
+ // Create the CALLSEQ_END node
+ CLI.Chain = CLI.DAG.getCALLSEQ_END(
+ CLI.Chain,
+ CLI.DAG.getIntPtrConstant(NumBytes, true),
+ CLI.DAG.getIntPtrConstant(0, true),
+ InFlag);
+ InFlag = CLI.Chain.getValue(1);
+ // Handle result values, copying them out of physregs into vregs that
+ // we return
+ return LowerCallResult(CLI.Chain, InFlag, CLI.CallConv, CLI.IsVarArg, CLI.Ins, CLI.DL, CLI.DAG,
+ InVals);
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSDIV64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSDIV32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16
+ || OVT.getScalarType() == MVT::i8) {
+ DST = LowerSDIV24(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
+{
+ EVT OVT = Op.getValueType();
+ SDValue DST;
+ if (OVT.getScalarType() == MVT::i64) {
+ DST = LowerSREM64(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i32) {
+ DST = LowerSREM32(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i16) {
+ DST = LowerSREM16(Op, DAG);
+ } else if (OVT.getScalarType() == MVT::i8) {
+ DST = LowerSREM8(Op, DAG);
+ } else {
+ DST = SDValue(Op.getNode(), 0);
+ }
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
+{
+ EVT VT = Op.getValueType();
+ SDValue Nodes1;
+ SDValue second;
+ SDValue third;
+ SDValue fourth;
+ DebugLoc DL = Op.getDebugLoc();
+ Nodes1 = DAG.getNode(AMDILISD::VBUILD,
+ DL,
+ VT, Op.getOperand(0));
+#if 0
+ bool allEqual = true;
+ for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
+ if (Op.getOperand(0) != Op.getOperand(x)) {
+ allEqual = false;
+ break;
+ }
+ }
+ if (allEqual) {
+ return Nodes1;
+ }
+#endif
+ switch(Op.getNumOperands()) {
+ default:
+ case 1:
+ break;
+ case 4:
+ fourth = Op.getOperand(3);
+ if (fourth.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ fourth,
+ DAG.getConstant(7, MVT::i32));
+ }
+ case 3:
+ third = Op.getOperand(2);
+ if (third.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ third,
+ DAG.getConstant(6, MVT::i32));
+ }
+ case 2:
+ second = Op.getOperand(1);
+ if (second.getOpcode() != ISD::UNDEF) {
+ Nodes1 = DAG.getNode(
+ ISD::INSERT_VECTOR_ELT,
+ DL,
+ Op.getValueType(),
+ Nodes1,
+ second,
+ DAG.getConstant(5, MVT::i32));
+ }
+ break;
+ };
+ return Nodes1;
+}
+
+SDValue
+AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond = Op.getOperand(0);
+ SDValue LHS = Op.getOperand(1);
+ SDValue RHS = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(AMDILISD::CMOVLOG,
+ DL,
+ Op.getValueType(), Cond, LHS, RHS);
+ return Cond;
+}
+SDValue
+AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Cond;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue CC = Op.getOperand(2);
+ DebugLoc DL = Op.getDebugLoc();
+ ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ unsigned int AMDILCC = CondCCodeToCC(
+ SetCCOpcode,
+ LHS.getValueType().getSimpleVT().SimpleTy);
+ assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
+ Cond = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ Cond = getConversionNode(DAG, Cond, Op, true);
+ Cond = DAG.getNode(
+ ISD::AND,
+ DL,
+ Cond.getValueType(),
+ DAG.getConstant(1, Cond.getValueType()),
+ Cond);
+ return Cond;
+}
+
+SDValue
+AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Data = Op.getOperand(0);
+ VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
+ DebugLoc DL = Op.getDebugLoc();
+ EVT DVT = Data.getValueType();
+ EVT BVT = BaseType->getVT();
+ unsigned baseBits = BVT.getScalarType().getSizeInBits();
+ unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
+ unsigned shiftBits = srcBits - baseBits;
+ if (srcBits < 32) {
+ // If the op is less than 32 bits, then it needs to extend to 32bits
+ // so it can properly keep the upper bits valid.
+ EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
+ Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
+ shiftBits = 32 - baseBits;
+ DVT = IVT;
+ }
+ SDValue Shift = DAG.getConstant(shiftBits, DVT);
+ // Shift left by 'Shift' bits.
+ Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
+ // Signed shift Right by 'Shift' bits.
+ Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
+ if (srcBits < 32) {
+ // Once the sign extension is done, the op needs to be converted to
+ // its original type.
+ Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
+ }
+ return Data;
+}
+EVT
+AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
+{
+ int iSize = (size * numEle);
+ int vEle = (iSize >> ((size == 64) ? 6 : 5));
+ if (!vEle) {
+ vEle = 1;
+ }
+ if (size == 64) {
+ if (vEle == 1) {
+ return EVT(MVT::i64);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i64, vEle));
+ }
+ } else {
+ if (vEle == 1) {
+ return EVT(MVT::i32);
+ } else {
+ return EVT(MVT::getVectorVT(MVT::i32, vEle));
+ }
+ }
+}
+
+SDValue
+AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Size = Op.getOperand(1);
+ unsigned int SPReg = AMDGPU::SP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue SP = DAG.getCopyFromReg(Chain,
+ DL,
+ SPReg, MVT::i32);
+ SDValue NewSP = DAG.getNode(ISD::ADD,
+ DL,
+ MVT::i32, SP, Size);
+ Chain = DAG.getCopyToReg(SP.getValue(1),
+ DL,
+ SPReg, NewSP);
+ SDValue Ops[2] = {NewSP, Chain};
+ Chain = DAG.getMergeValues(Ops, 2 ,DL);
+ return Chain;
+}
+SDValue
+AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue Cond = Op.getOperand(1);
+ SDValue Jump = Op.getOperand(2);
+ SDValue Result;
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ Op.getDebugLoc(),
+ Op.getValueType(),
+ Chain, Jump, Cond);
+ return Result;
+}
+
+SDValue
+AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
+{
+ SDValue Chain = Op.getOperand(0);
+ SDValue CC = Op.getOperand(1);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue JumpT = Op.getOperand(4);
+ SDValue CmpValue;
+ SDValue Result;
+ CmpValue = DAG.getNode(
+ ISD::SELECT_CC,
+ Op.getDebugLoc(),
+ LHS.getValueType(),
+ LHS, RHS,
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ CC);
+ Result = DAG.getNode(
+ AMDILISD::BRANCH_COND,
+ CmpValue.getDebugLoc(),
+ MVT::Other, Chain,
+ JumpT, CmpValue);
+ return Result;
+}
+
+// LowerRET - Lower an ISD::RET node.
+SDValue
+AMDILTargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ DebugLoc dl, SelectionDAG &DAG)
+const
+{
+ //MachineFunction& MF = DAG.getMachineFunction();
+ // CCValAssign - represent the assignment of the return value
+ // to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+ getTargetMachine(), RVLocs, *DAG.getContext());
+
+ // Analyze return values of ISD::RET
+ CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
+ MRI.addLiveOut(RVLocs[i].getLocReg());
+ }
+ }
+ // FIXME: implement this when tail call is implemented
+ // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
+ // both x86 and ppc implement this in ISelLowering
+
+ // Regular return here
+ SDValue Flag;
+ SmallVector<SDValue, 6> RetOps;
+ RetOps.push_back(Chain);
+ RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
+ for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
+ CCValAssign &VA = RVLocs[i];
+ SDValue ValToCopy = OutVals[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ // ISD::Ret => ret chain, (regnum1, val1), ...
+ // So i * 2 + 1 index only the regnums
+ Chain = DAG.getCopyToReg(Chain,
+ dl,
+ VA.getLocReg(),
+ ValToCopy,
+ Flag);
+ // guarantee that all emitted copies are stuck together
+ // avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+ /*if (MF.getFunction()->hasStructRetAttr()) {
+ assert(0 && "Struct returns are not yet implemented!");
+ // Both MIPS and X86 have this
+ }*/
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ Flag = DAG.getNode(AMDILISD::RET_FLAG,
+ dl,
+ MVT::Other, &RetOps[0], RetOps.size());
+ return Flag;
+}
+
+unsigned int
+AMDILTargetLowering::getFunctionAlignment(const Function *) const
+{
+ return 0;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ MVT INTTY;
+ MVT FLTTY;
+ if (!OVT.isVector()) {
+ INTTY = MVT::i32;
+ FLTTY = MVT::f32;
+ } else if (OVT.getVectorNumElements() == 2) {
+ INTTY = MVT::v2i32;
+ FLTTY = MVT::v2f32;
+ } else if (OVT.getVectorNumElements() == 4) {
+ INTTY = MVT::v4i32;
+ FLTTY = MVT::v4f32;
+ }
+ unsigned bitsize = OVT.getScalarType().getSizeInBits();
+ // char|short jq = ia ^ ib;
+ SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
+
+ // jq = jq >> (bitsize - 2)
+ jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
+
+ // jq = jq | 0x1
+ jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
+
+ // jq = (int)jq
+ jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
+
+ // int ia = (int)LHS;
+ SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
+
+ // int ib, (int)RHS;
+ SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
+
+ // float fa = (float)ia;
+ SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
+
+ // float fb = (float)ib;
+ SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
+
+ // float fq = native_divide(fa, fb);
+ SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
+
+ // fq = trunc(fq);
+ fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
+
+ // float fqneg = -fq;
+ SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
+
+ // float fr = mad(fqneg, fb, fa);
+ SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
+
+ // int iq = (int)fq;
+ SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
+
+ // fr = fabs(fr);
+ fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
+
+ // fb = fabs(fb);
+ fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
+
+ // int cv = fr >= fb;
+ SDValue cv;
+ if (INTTY == MVT::i32) {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ } else {
+ cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
+ }
+ // jq = (cv ? jq : 0);
+ jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
+ DAG.getConstant(0, OVT));
+ // dst = iq + jq;
+ iq = DAG.getSExtOrTrunc(iq, DL, OVT);
+ iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
+ return iq;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSDIV32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r0, r0, r1
+ // ixor r10, r10, r11
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getSelectCC(DL,
+ r0, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getSelectCC(DL,
+ r1, DAG.getConstant(0, OVT),
+ DAG.getConstant(-1, MVT::i32),
+ DAG.getConstant(0, MVT::i32),
+ ISD::SETLT);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r0, r0, r1
+ r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
+
+ // ixor r10, r10, r11
+ r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i8) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i8) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ MVT INTTY = MVT::i32;
+ if (OVT == MVT::v2i16) {
+ INTTY = MVT::v2i32;
+ } else if (OVT == MVT::v4i16) {
+ INTTY = MVT::v4i32;
+ }
+ SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
+ SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
+ LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
+ LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
+ return LHS;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
+{
+ DebugLoc DL = Op.getDebugLoc();
+ EVT OVT = Op.getValueType();
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ // The LowerSREM32 function generates equivalent to the following IL.
+ // mov r0, LHS
+ // mov r1, RHS
+ // ilt r10, r0, 0
+ // ilt r11, r1, 0
+ // iadd r0, r0, r10
+ // iadd r1, r1, r11
+ // ixor r0, r0, r10
+ // ixor r1, r1, r11
+ // udiv r20, r0, r1
+ // umul r20, r20, r1
+ // sub r0, r0, r20
+ // iadd r0, r0, r10
+ // ixor DST, r0, r10
+
+ // mov r0, LHS
+ SDValue r0 = LHS;
+
+ // mov r1, RHS
+ SDValue r1 = RHS;
+
+ // ilt r10, r0, 0
+ SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r0, DAG.getConstant(0, OVT));
+
+ // ilt r11, r1, 0
+ SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
+ DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
+ r1, DAG.getConstant(0, OVT));
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // iadd r1, r1, r11
+ r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
+
+ // ixor r0, r0, r10
+ r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+
+ // ixor r1, r1, r11
+ r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
+
+ // udiv r20, r0, r1
+ SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
+
+ // umul r20, r20, r1
+ r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
+
+ // sub r0, r0, r20
+ r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
+
+ // iadd r0, r0, r10
+ r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
+
+ // ixor DST, r0, r10
+ SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
+ return DST;
+}
+
+SDValue
+AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
+{
+ return SDValue(Op.getNode(), 0);
+}
OpenPOWER on IntegriCloud