summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/Target/TargetLowering.h37
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp232
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h14
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp6
-rw-r--r--llvm/lib/Target/Mips/MipsCCState.cpp64
-rw-r--r--llvm/lib/Target/Mips/MipsCCState.h34
-rw-r--r--llvm/lib/Target/Mips/MipsCallingConv.td10
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp74
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.h27
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterInfo.cpp4
-rw-r--r--llvm/test/CodeGen/Mips/cconv/vector.ll1657
-rw-r--r--llvm/test/CodeGen/Mips/ctlz-v.ll12
-rw-r--r--llvm/test/CodeGen/Mips/cttz-v.ll19
-rw-r--r--llvm/test/CodeGen/Mips/return-vector.ll33
15 files changed, 2121 insertions, 104 deletions
diff --git a/llvm/include/llvm/Target/TargetLowering.h b/llvm/include/llvm/Target/TargetLowering.h
index 240896a538f..6a350a2169b 100644
--- a/llvm/include/llvm/Target/TargetLowering.h
+++ b/llvm/include/llvm/Target/TargetLowering.h
@@ -662,6 +662,16 @@ public:
unsigned &NumIntermediates,
MVT &RegisterVT) const;
+ /// Certain targets such as MIPS require that some types such as vectors are
+ /// always broken down into scalars in some contexts. This occurs even if the
+ /// vector type is legal.
+ virtual unsigned getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+ return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ }
+
struct IntrinsicInfo {
unsigned opc = 0; // target opcode
EVT memVT; // memory VT
@@ -1002,6 +1012,33 @@ public:
llvm_unreachable("Unsupported extended type!");
}
+ /// Certain combinations of ABIs, Targets and features require that types
+ /// are legal for some operations and not for other operations.
+ /// For MIPS all vector types must be passed through the integer register set.
+ virtual MVT getRegisterTypeForCallingConv(MVT VT) const {
+ return getRegisterType(VT);
+ }
+
+ virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ return getRegisterType(Context, VT);
+ }
+
+ /// Certain targets require unusual breakdowns of certain types. For MIPS,
+ /// this occurs when a vector type is used, as vector are passed through the
+ /// integer register set.
+ virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ return getNumRegisters(Context, VT);
+ }
+
+ /// Certain targets have context senstive alignment requirements, where one
+ /// type has the alignment requirement of another type.
+ virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
+ DataLayout DL) const {
+ return DL.getABITypeAlignment(ArgTy);
+ }
+
/// If true, then instruction selection should seek to shrink the FP constant
/// of the specified type to a smaller type in order to save space and / or
/// reduce runtime.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 315d841cf3c..43887a2e348 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -115,7 +115,8 @@ static const unsigned MaxParallelChains = 64;
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
- MVT PartVT, EVT ValueVT, const Value *V);
+ MVT PartVT, EVT ValueVT, const Value *V,
+ bool IsABIRegCopy);
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
@@ -125,10 +126,11 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, const Value *V,
- Optional<ISD::NodeType> AssertOp = None) {
+ Optional<ISD::NodeType> AssertOp = None,
+ bool IsABIRegCopy = false) {
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
- PartVT, ValueVT, V);
+ PartVT, ValueVT, V, IsABIRegCopy);
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -272,7 +274,8 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
/// ValueVT (ISD::AssertSext).
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
const SDValue *Parts, unsigned NumParts,
- MVT PartVT, EVT ValueVT, const Value *V) {
+ MVT PartVT, EVT ValueVT, const Value *V,
+ bool IsABIRegCopy) {
assert(ValueVT.isVector() && "Not a vector value");
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -283,9 +286,18 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
- unsigned NumRegs =
- TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
- NumIntermediates, RegisterVT);
+ unsigned NumRegs;
+
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
+
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
@@ -314,9 +326,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
+ EVT BuiltVectorTy =
+ EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
+ (IntermediateVT.isVector()
+ ? IntermediateVT.getVectorNumElements() * NumParts
+ : NumIntermediates));
Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
: ISD::BUILD_VECTOR,
- DL, ValueVT, Ops);
+ DL, BuiltVectorTy, Ops);
}
// There is now one part, held in Val. Correct it to match ValueVT.
@@ -355,13 +372,30 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
TLI.isTypeLegal(ValueVT))
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
- // Handle cases such as i8 -> <1 x i1>
if (ValueVT.getVectorNumElements() != 1) {
- diagnosePossiblyInvalidConstraint(*DAG.getContext(), V,
- "non-trivial scalar-to-vector conversion");
+
+ // Certain ABIs require that vectors are passed as integers. For vectors
+ // are the same size, this is an obvious bitcast.
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+ } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
+ // Bitcast Val back the original type and extract the corresponding
+ // vector we want.
+ unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
+ EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
+ ValueVT.getVectorElementType(), Elts);
+ Val = DAG.getBitcast(WiderVecType, Val);
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
+ diagnosePossiblyInvalidConstraint(
+ *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
return DAG.getUNDEF(ValueVT);
}
+ // Handle cases such as i8 -> <1 x i1>
if (ValueVT.getVectorNumElements() == 1 &&
ValueVT.getVectorElementType() != PartEVT)
Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType());
@@ -371,7 +405,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
- MVT PartVT, const Value *V);
+ MVT PartVT, const Value *V, bool IsABIRegCopy);
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
@@ -379,12 +413,14 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
SDValue *Parts, unsigned NumParts, MVT PartVT,
const Value *V,
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
+ bool IsABIRegCopy = false) {
EVT ValueVT = Val.getValueType();
// Handle the vector case separately.
if (ValueVT.isVector())
- return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V);
+ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
+ IsABIRegCopy);
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
@@ -509,7 +545,9 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
/// value split into legal parts.
static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
- MVT PartVT, const Value *V) {
+ MVT PartVT, const Value *V,
+ bool IsABIRegCopy) {
+
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -550,15 +588,22 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
// Promoted vector extract
Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
- } else{
+ } else {
// Vector -> scalar conversion.
- assert(ValueVT.getVectorNumElements() == 1 &&
- "Only trivial vector-to-scalar conversions should get here!");
- Val = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
- DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (ValueVT.getVectorNumElements() == 1) {
+ Val = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
- Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ } else {
+ assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
+ "lossy conversion of vector to scalar type");
+ EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(),
+ ValueVT.getSizeInBits());
+ Val = DAG.getBitcast(IntermediateType, Val);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
+ }
}
Parts[0] = Val;
@@ -569,15 +614,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
EVT IntermediateVT;
MVT RegisterVT;
unsigned NumIntermediates;
- unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
- IntermediateVT,
- NumIntermediates, RegisterVT);
+ unsigned NumRegs;
+ if (IsABIRegCopy) {
+ NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
+ *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
+ RegisterVT);
+ } else {
+ NumRegs =
+ TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
+ NumIntermediates, RegisterVT);
+ }
unsigned NumElements = ValueVT.getVectorNumElements();
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
+ // Convert the vector to the appropiate type if necessary.
+ unsigned DestVectorNoElts =
+ NumIntermediates *
+ (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
+ EVT BuiltVectorTy = EVT::getVectorVT(
+ *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
+ if (Val.getValueType() != BuiltVectorTy)
+ Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -610,22 +671,31 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
}
}
-RegsForValue::RegsForValue() {}
+RegsForValue::RegsForValue() { IsABIMangled = false; }
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
- EVT valuevt)
- : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+ EVT valuevt, bool IsABIMangledValue)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
+ RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL, unsigned Reg, Type *Ty) {
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ bool IsABIMangledValue) {
ComputeValueVTs(TLI, DL, Ty, ValueVTs);
+ IsABIMangled = IsABIMangledValue;
+
for (EVT ValueVT : ValueVTs) {
- unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT);
- MVT RegisterVT = TLI.getRegisterType(Context, ValueVT);
+ unsigned NumRegs = IsABIMangledValue
+ ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
+ : TLI.getNumRegisters(Context, ValueVT);
+ MVT RegisterVT = IsABIMangledValue
+ ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
+ : TLI.getRegisterType(Context, ValueVT);
for (unsigned i = 0; i != NumRegs; ++i)
Regs.push_back(Reg + i);
RegVTs.push_back(RegisterVT);
+ RegCount.push_back(NumRegs);
Reg += NumRegs;
}
}
@@ -646,8 +716,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
// Copy the legal parts from the registers.
EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
- MVT RegisterVT = RegVTs[Value];
+ unsigned NumRegs = RegCount[Value];
+ MVT RegisterVT = IsABIMangled
+ ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+ : RegVTs[Value];
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
@@ -742,9 +814,11 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
unsigned NumRegs = Regs.size();
SmallVector<SDValue, 8> Parts(NumRegs);
for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
- unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
- MVT RegisterVT = RegVTs[Value];
+ unsigned NumParts = RegCount[Value];
+
+ MVT RegisterVT = IsABIMangled
+ ? TLI.getRegisterTypeForCallingConv(RegVTs[Value])
+ : RegVTs[Value];
if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
ExtendKind = ISD::ZERO_EXTEND;
@@ -967,10 +1041,16 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
if (It != FuncInfo.ValueMap.end()) {
unsigned InReg = It->second;
+ bool IsABIRegCopy =
+ V && ((isa<CallInst>(V) &&
+ !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+ isa<ReturnInst>(V));
+
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), InReg, Ty);
+ DAG.getDataLayout(), InReg, Ty, IsABIRegCopy);
SDValue Chain = DAG.getEntryNode();
- Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
+ Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
+ V);
resolveDanglingDebugInfo(V, Result);
}
@@ -1157,8 +1237,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
// If this is an instruction which fast-isel has deferred, select it now.
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ bool IsABIRegCopy =
+ V && ((isa<CallInst>(V) &&
+ !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+ isa<ReturnInst>(V));
+
RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
- Inst->getType());
+ Inst->getType(), IsABIRegCopy);
SDValue Chain = DAG.getEntryNode();
return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
}
@@ -1386,12 +1471,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
- unsigned NumParts = TLI.getNumRegisters(Context, VT);
- MVT PartVT = TLI.getRegisterType(Context, VT);
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
+ MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
SmallVector<SDValue, 4> Parts(NumParts);
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, &I, ExtendKind);
+ &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
@@ -7064,8 +7149,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
- MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
- Chain, &Flag, CS.getInstruction());
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+ CS.getInstruction());
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
@@ -7681,8 +7766,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
} else {
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT =
+ getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs =
+ getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
MyFlags.VT = RegisterVT;
@@ -7731,7 +7818,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
if (Args[i].IsZExt)
Flags.setZExt();
@@ -7786,8 +7877,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Flags.setInConsecutiveRegs();
Flags.setOrigAlign(OriginalAlignment);
- MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
+ MVT PartVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+ unsigned NumParts =
+ getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
@@ -7817,7 +7909,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind);
+ CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
+ true);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -7917,12 +8010,14 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned CurReg = 0;
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
EVT VT = RetTys[I];
- MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
- unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
+ MVT RegisterVT =
+ getRegisterTypeForCallingConv(CLI.RetTy->getContext(), VT);
+ unsigned NumRegs =
+ getNumRegistersForCallingConv(CLI.RetTy->getContext(), VT);
ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
NumRegs, RegisterVT, VT, nullptr,
- AssertOp));
+ AssertOp, true));
CurReg += NumRegs;
}
@@ -7958,8 +8053,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ // If this is an InlineAsm we have to match the registers required, not the
+ // notional registers required by the type.
+ bool IsABIRegCopy =
+ V && ((isa<CallInst>(V) &&
+ !(static_cast<const CallInst *>(V))->isInlineAsm()) ||
+ isa<ReturnInst>(V));
+
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType());
+ V->getType(), IsABIRegCopy);
SDValue Chain = DAG.getEntryNode();
ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) ==
@@ -8202,7 +8304,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
EVT VT = ValueVTs[Value];
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
ISD::ArgFlagsTy Flags;
- unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
+
+ // Certain targets (such as MIPS), may have a different ABI alignment
+ // for a type depending on the context. Give the target a chance to
+ // specify the alignment it wants.
+ unsigned OriginalAlignment =
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL);
if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt))
Flags.setZExt();
@@ -8264,8 +8371,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
- MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
- unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+ MVT RegisterVT =
+ TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+ unsigned NumRegs =
+ TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed,
Idx-1, PartBase+i*RegisterVT.getStoreSize());
@@ -8372,8 +8481,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
for (unsigned Val = 0; Val != NumValues; ++Val) {
EVT VT = ValueVTs[Val];
- MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
- unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
+ MVT PartVT =
+ TLI->getRegisterTypeForCallingConv(*CurDAG->getContext(), VT);
+ unsigned NumParts =
+ TLI->getNumRegistersForCallingConv(*CurDAG->getContext(), VT);
// Even an apparant 'unused' swifterror argument needs to be returned. So
// we do generate a copy for it that can be used on return from the
@@ -8386,7 +8497,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
AssertOp = ISD::AssertZext;
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
- PartVT, VT, nullptr, AssertOp));
+ PartVT, VT, nullptr, AssertOp,
+ true));
}
i += NumParts;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index c6acc09b660..b24a513f3c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -973,18 +973,28 @@ struct RegsForValue {
/// expanded value requires multiple registers.
SmallVector<unsigned, 4> Regs;
+ /// This list holds the number of registers for each value.
+ SmallVector<unsigned, 4> RegCount;
+
+ /// Records if this value needs to be treated in an ABI dependant manner,
+ /// different to normal type legalization.
+ bool IsABIMangled;
+
RegsForValue();
- RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt);
+ RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
+ bool IsABIMangledValue = false);
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
- const DataLayout &DL, unsigned Reg, Type *Ty);
+ const DataLayout &DL, unsigned Reg, Type *Ty,
+ bool IsABIMangledValue = false);
/// Add the specified values to this one.
void append(const RegsForValue &RHS) {
ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ RegCount.push_back(RHS.Regs.size());
}
/// Emit a series of CopyFromReg nodes that copies from this value and returns
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index d27e2455978..1e2dc3a9f23 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -835,7 +835,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
// completely and make statepoint call to return a tuple.
unsigned Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
- DAG.getDataLayout(), Reg, RetTy);
+ DAG.getDataLayout(), Reg, RetTy, true);
SDValue Chain = DAG.getEntryNode();
RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index fc147633966..c9ecd8ae0f9 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1616,8 +1616,10 @@ void llvm::GetReturnInfo(Type *ReturnType, AttributeList attr,
VT = MinVT;
}
- unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
- MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ unsigned NumParts =
+ TLI.getNumRegistersForCallingConv(ReturnType->getContext(), VT);
+ MVT PartVT =
+ TLI.getRegisterTypeForCallingConv(ReturnType->getContext(), VT);
// 'inreg' on function refers to return value
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
diff --git a/llvm/lib/Target/Mips/MipsCCState.cpp b/llvm/lib/Target/Mips/MipsCCState.cpp
index 7af988c1f64..62ff99c7816 100644
--- a/llvm/lib/Target/Mips/MipsCCState.cpp
+++ b/llvm/lib/Target/Mips/MipsCCState.cpp
@@ -54,6 +54,22 @@ static bool originalTypeIsF128(Type *Ty, const SDNode *CallNode) {
return (ES && Ty->isIntegerTy(128) && isF128SoftLibCall(ES->getSymbol()));
}
+/// Return true if the original type was vXfXX.
+static bool originalEVTTypeIsVectorFloat(EVT Ty) {
+ if (Ty.isVector() && Ty.getVectorElementType().isFloatingPoint())
+ return true;
+
+ return false;
+}
+
+/// Return true if the original type was vXfXX / vXfXX.
+static bool originalTypeIsVectorFloat(Type * Ty) {
+ if (Ty->isVectorTy() && Ty->isFPOrFPVectorTy())
+ return true;
+
+ return false;
+}
+
MipsCCState::SpecialCallingConvType
MipsCCState::getSpecialCallingConvForCallee(const SDNode *Callee,
const MipsSubtarget &Subtarget) {
@@ -81,8 +97,8 @@ void MipsCCState::PreAnalyzeCallResultForF128(
}
}
-/// Identify lowered values that originated from f128 arguments and record
-/// this for use by RetCC_MipsN.
+/// Identify lowered values that originated from f128 or float arguments and
+/// record this for use by RetCC_MipsN.
void MipsCCState::PreAnalyzeReturnForF128(
const SmallVectorImpl<ISD::OutputArg> &Outs) {
const MachineFunction &MF = getMachineFunction();
@@ -94,26 +110,50 @@ void MipsCCState::PreAnalyzeReturnForF128(
}
}
-/// Identify lowered values that originated from f128 arguments and record
+/// Identify lower values that originated from vXfXX and record
+/// this.
+void MipsCCState::PreAnalyzeCallResultForVectorFloat(
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const TargetLowering::CallLoweringInfo &CLI) {
+ for (unsigned i = 0; i < Ins.size(); ++i) {
+ OriginalRetWasFloatVector.push_back(
+ originalTypeIsVectorFloat(CLI.RetTy));
+ }
+}
+
+/// Identify lowered values that originated from vXfXX arguments and record
/// this.
+void MipsCCState::PreAnalyzeReturnForVectorFloat(
+ const SmallVectorImpl<ISD::OutputArg> &Outs) {
+ for (unsigned i = 0; i < Outs.size(); ++i) {
+ ISD::OutputArg Out = Outs[i];
+ OriginalRetWasFloatVector.push_back(
+ originalEVTTypeIsVectorFloat(Out.ArgVT));
+ }
+}
+/// Identify lowered values that originated from f128, float and sret to vXfXX
+/// arguments and record this.
void MipsCCState::PreAnalyzeCallOperands(
const SmallVectorImpl<ISD::OutputArg> &Outs,
std::vector<TargetLowering::ArgListEntry> &FuncArgs,
const SDNode *CallNode) {
for (unsigned i = 0; i < Outs.size(); ++i) {
- OriginalArgWasF128.push_back(
- originalTypeIsF128(FuncArgs[Outs[i].OrigArgIndex].Ty, CallNode));
- OriginalArgWasFloat.push_back(
- FuncArgs[Outs[i].OrigArgIndex].Ty->isFloatingPointTy());
+ TargetLowering::ArgListEntry FuncArg = FuncArgs[Outs[i].OrigArgIndex];
+
+ OriginalArgWasF128.push_back(originalTypeIsF128(FuncArg.Ty, CallNode));
+ OriginalArgWasFloat.push_back(FuncArg.Ty->isFloatingPointTy());
+
+ OriginalArgWasFloatVector.push_back(FuncArg.Ty->isVectorTy());
CallOperandIsFixed.push_back(Outs[i].IsFixed);
}
}
-/// Identify lowered values that originated from f128 arguments and record
-/// this.
+/// Identify lowered values that originated from f128, float and vXfXX arguments
+/// and record this.
void MipsCCState::PreAnalyzeFormalArgumentsForF128(
const SmallVectorImpl<ISD::InputArg> &Ins) {
const MachineFunction &MF = getMachineFunction();
+
for (unsigned i = 0; i < Ins.size(); ++i) {
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
@@ -123,6 +163,7 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
if (Ins[i].Flags.isSRet()) {
OriginalArgWasF128.push_back(false);
OriginalArgWasFloat.push_back(false);
+ OriginalArgWasFloatVector.push_back(false);
continue;
}
@@ -132,5 +173,10 @@ void MipsCCState::PreAnalyzeFormalArgumentsForF128(
OriginalArgWasF128.push_back(
originalTypeIsF128(FuncArg->getType(), nullptr));
OriginalArgWasFloat.push_back(FuncArg->getType()->isFloatingPointTy());
+
+ // The MIPS vector ABI exhibits a corner case of sorts or quirk; if the
+ // first argument is actually an SRet pointer to a vector, then the next
+ // argument slot is $a2.
+ OriginalArgWasFloatVector.push_back(FuncArg->getType()->isVectorTy());
}
}
diff --git a/llvm/lib/Target/Mips/MipsCCState.h b/llvm/lib/Target/Mips/MipsCCState.h
index 081c393a09b..d86bb85126b 100644
--- a/llvm/lib/Target/Mips/MipsCCState.h
+++ b/llvm/lib/Target/Mips/MipsCCState.h
@@ -45,16 +45,33 @@ private:
const SDNode *CallNode);
/// Identify lowered values that originated from f128 arguments and record
- /// this.
+ /// this for use by RetCC_MipsN.
void
PreAnalyzeFormalArgumentsForF128(const SmallVectorImpl<ISD::InputArg> &Ins);
+ void PreAnalyzeCallResultForVectorFloat(
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const TargetLowering::CallLoweringInfo &CLI);
+
+ void PreAnalyzeFormalArgumentsForVectorFloat(
+ const SmallVectorImpl<ISD::InputArg> &Ins);
+
+ void
+ PreAnalyzeReturnForVectorFloat(const SmallVectorImpl<ISD::OutputArg> &Outs);
+
/// Records whether the value has been lowered from an f128.
SmallVector<bool, 4> OriginalArgWasF128;
/// Records whether the value has been lowered from float.
SmallVector<bool, 4> OriginalArgWasFloat;
+ /// Records whether the value has been lowered from a floating point vector.
+ SmallVector<bool, 4> OriginalArgWasFloatVector;
+
+ /// Records whether the return value has been lowered from a floating point
+ /// vector.
+ SmallVector<bool, 4> OriginalRetWasFloatVector;
+
/// Records whether the value was a fixed argument.
/// See ISD::OutputArg::IsFixed,
SmallVector<bool, 4> CallOperandIsFixed;
@@ -78,6 +95,7 @@ public:
CCState::AnalyzeCallOperands(Outs, Fn);
OriginalArgWasF128.clear();
OriginalArgWasFloat.clear();
+ OriginalArgWasFloatVector.clear();
CallOperandIsFixed.clear();
}
@@ -96,31 +114,38 @@ public:
CCState::AnalyzeFormalArguments(Ins, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
}
void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
CCAssignFn Fn,
const TargetLowering::CallLoweringInfo &CLI) {
PreAnalyzeCallResultForF128(Ins, CLI);
+ PreAnalyzeCallResultForVectorFloat(Ins, CLI);
CCState::AnalyzeCallResult(Ins, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
}
void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
PreAnalyzeReturnForF128(Outs);
+ PreAnalyzeReturnForVectorFloat(Outs);
CCState::AnalyzeReturn(Outs, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
}
bool CheckReturn(const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
CCAssignFn Fn) {
PreAnalyzeReturnForF128(ArgsFlags);
+ PreAnalyzeReturnForVectorFloat(ArgsFlags);
bool Return = CCState::CheckReturn(ArgsFlags, Fn);
OriginalArgWasFloat.clear();
OriginalArgWasF128.clear();
+ OriginalArgWasFloatVector.clear();
return Return;
}
@@ -128,6 +153,13 @@ public:
bool WasOriginalArgFloat(unsigned ValNo) {
return OriginalArgWasFloat[ValNo];
}
+ bool WasOriginalArgVectorFloat(unsigned ValNo) const {
+ return OriginalArgWasFloatVector[ValNo];
+ }
+ bool WasOriginalRetVectorFloat(unsigned ValNo) const {
+ return OriginalRetWasFloatVector[ValNo];
+ }
+
bool IsCallOperandFixed(unsigned ValNo) { return CallOperandIsFixed[ValNo]; }
SpecialCallingConvType getSpecialCallingConv() { return SpecialCallingConv; }
};
diff --git a/llvm/lib/Target/Mips/MipsCallingConv.td b/llvm/lib/Target/Mips/MipsCallingConv.td
index a57cb7badc1..b5df78f89a6 100644
--- a/llvm/lib/Target/Mips/MipsCallingConv.td
+++ b/llvm/lib/Target/Mips/MipsCallingConv.td
@@ -37,6 +37,10 @@ class CCIfOrigArgWasF128<CCAction A>
class CCIfArgIsVarArg<CCAction A>
: CCIf<"!static_cast<MipsCCState *>(&State)->IsCallOperandFixed(ValNo)", A>;
+/// Match if the return was a floating point vector.
+class CCIfOrigArgWasNotVectorFloat<CCAction A>
+ : CCIf<"!static_cast<MipsCCState *>(&State)"
+ "->WasOriginalRetVectorFloat(ValNo)", A>;
/// Match if the special calling conv is the specified value.
class CCIfSpecialCallingConv<string CC, CCAction A>
@@ -93,8 +97,10 @@ def RetCC_MipsO32 : CallingConv<[
// Promote i1/i8/i16 return values to i32.
CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
- // i32 are returned in registers V0, V1, A0, A1
- CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
+ // i32 are returned in registers V0, V1, A0, A1, unless the original return
+ // type was a vector of floats.
+ CCIfOrigArgWasNotVectorFloat<CCIfType<[i32],
+ CCAssignToReg<[V0, V1, A0, A1]>>>,
// f32 are returned in registers F0, F2
CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 93c5f496ce9..a726e25f0b0 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -71,6 +71,48 @@ static bool isShiftedMask(uint64_t I, uint64_t &Pos, uint64_t &Size) {
return true;
}
+// The MIPS MSA ABI passes vector arguments in the integer register set.
+// The number of integer registers used is dependant on the ABI used.
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(MVT VT) const {
+ if (VT.isVector() && Subtarget.hasMSA())
+ return Subtarget.isABI_O32() ? MVT::i32 : MVT::i64;
+ return MipsTargetLowering::getRegisterType(VT);
+}
+
+MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (VT.isVector()) {
+ if (Subtarget.isABI_O32()) {
+ return MVT::i32;
+ } else {
+ return (VT.getSizeInBits() == 32) ? MVT::i32 : MVT::i64;
+ }
+ }
+ return MipsTargetLowering::getRegisterType(Context, VT);
+}
+
+unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
+ EVT VT) const {
+ if (VT.isVector())
+ return std::max((VT.getSizeInBits() / (Subtarget.isABI_O32() ? 32 : 64)),
+ 1U);
+ return MipsTargetLowering::getNumRegisters(Context, VT);
+}
+
+unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const {
+
+ // Break down vector types to either 2 i64s or 4 i32s.
+ RegisterVT = getRegisterTypeForCallingConv(Context, VT) ;
+ IntermediateVT = RegisterVT;
+ NumIntermediates = VT.getSizeInBits() < RegisterVT.getSizeInBits()
+ ? VT.getVectorNumElements()
+ : VT.getSizeInBits() / RegisterVT.getSizeInBits();
+
+ return NumIntermediates;
+}
+
SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
MipsFunctionInfo *FI = DAG.getMachineFunction().getInfo<MipsFunctionInfo>();
return DAG.getRegister(FI->getGlobalBaseReg(), Ty);
@@ -2515,6 +2557,11 @@ SDValue MipsTargetLowering::lowerFP_TO_SINT(SDValue Op,
// yet to hold an argument. Otherwise, use A2, A3 and stack. If A1 is
// not used, it must be shadowed. If only A3 is available, shadow it and
// go to stack.
+// vXiX - Received as scalarized i32s, passed in A0 - A3 and the stack.
+// vXf32 - Passed in either a pair of registers {A0, A1}, {A2, A3} or {A0 - A3}
+// with the remainder spilled to the stack.
+// vXf64 - Passed in either {A0, A1, A2, A3} or {A2, A3} and in both cases
+// spilling the remainder to the stack.
//
// For vararg functions, all arguments are passed in A0, A1, A2, A3 and stack.
//===----------------------------------------------------------------------===//
@@ -2526,8 +2573,13 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
State.getMachineFunction().getSubtarget());
static const MCPhysReg IntRegs[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 };
+
+ const MipsCCState * MipsState = static_cast<MipsCCState *>(&State);
+
static const MCPhysReg F32Regs[] = { Mips::F12, Mips::F14 };
+ static const MCPhysReg FloatVectorIntRegs[] = { Mips::A0, Mips::A2 };
+
// Do not process byval args here.
if (ArgFlags.isByVal())
return true;
@@ -2565,8 +2617,26 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, MVT LocVT,
State.getFirstUnallocated(F32Regs) != ValNo;
unsigned OrigAlign = ArgFlags.getOrigAlign();
bool isI64 = (ValVT == MVT::i32 && OrigAlign == 8);
-
- if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
+ bool isVectorFloat = MipsState->WasOriginalArgVectorFloat(ValNo);
+
+ // The MIPS vector ABI for floats passes them in a pair of registers
+ if (ValVT == MVT::i32 && isVectorFloat) {
+ // This is the start of an vector that was scalarized into an unknown number
+ // of components. It doesn't matter how many there are. Allocate one of the
+ // notional 8 byte aligned registers which map onto the argument stack, and
+ // shadow the register lost to alignment requirements.
+ if (ArgFlags.isSplit()) {
+ Reg = State.AllocateReg(FloatVectorIntRegs);
+ if (Reg == Mips::A2)
+ State.AllocateReg(Mips::A1);
+ else if (Reg == 0)
+ State.AllocateReg(Mips::A3);
+ } else {
+ // If we're an intermediate component of the split, we can just attempt to
+ // allocate a register directly.
+ Reg = State.AllocateReg(IntRegs);
+ }
+ } else if (ValVT == MVT::i32 || (ValVT == MVT::f32 && AllocateFloatsInIntReg)) {
Reg = State.AllocateReg(IntRegs);
// If this is the first part of an i64 arg,
// the allocated register must be either A0 or A2.
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 2dcafd51061..0e47ed38f42 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -248,6 +248,33 @@ namespace llvm {
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
+ /// Return the register type for a given MVT, ensuring vectors are treated
+ /// as a series of gpr sized integers.
+ virtual MVT getRegisterTypeForCallingConv(MVT VT) const override;
+
+ /// Return the register type for a given MVT, ensuring vectors are treated
+ /// as a series of gpr sized integers.
+ virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
+ EVT VT) const override;
+
+ /// Return the number of registers for a given MVT, ensuring vectors are
+ /// treated as a series of gpr sized integers.
+ virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
+ EVT VT) const override;
+
+ /// Break down vectors to the correct number of gpr sized integers.
+ virtual unsigned getVectorTypeBreakdownForCallingConv(
+ LLVMContext &Context, EVT VT, EVT &IntermediateVT,
+ unsigned &NumIntermediates, MVT &RegisterVT) const override;
+
+ /// Return the correct alignment for the current calling convention.
+ virtual unsigned
+ getABIAlignmentForCallingConv(Type *ArgTy, DataLayout DL) const override {
+ if (ArgTy->isVectorTy())
+ return std::min(DL.getABITypeAlignment(ArgTy), 8U);
+ return DL.getABITypeAlignment(ArgTy);
+ }
+
ISD::NodeType getExtendForAtomicOps() const override {
return ISD::SIGN_EXTEND;
}
diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 65be350f259..625c80b9d68 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -283,10 +283,12 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
uint64_t stackSize = MF.getFrameInfo().getStackSize();
int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex);
+ unsigned alignment = MF.getFrameInfo().getObjectAlignment(FrameIndex);
DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
<< "spOffset : " << spOffset << "\n"
- << "stackSize : " << stackSize << "\n");
+ << "stackSize : " << stackSize << "\n"
+ << "alignment : " << alignment << "\n");
eliminateFI(MI, FIOperandNum, FrameIndex, stackSize, spOffset);
}
diff --git a/llvm/test/CodeGen/Mips/cconv/vector.ll b/llvm/test/CodeGen/Mips/cconv/vector.ll
new file mode 100644
index 00000000000..5a88d064fe7
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/cconv/vector.ll
@@ -0,0 +1,1657 @@
+; RUN: llc < %s -march=mips -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EB
+; RUN: llc < %s -march=mips -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EB
+; RUN: llc < %s -march=mips64 -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+; RUN: llc < %s -march=mipsel -mcpu=mips32 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32,MIPS32EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64 -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64,MIPS64EL
+; RUN: llc < %s -march=mipsel -mcpu=mips32r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS32R5,MIPS32R5EL
+; RUN: llc < %s -march=mips64el -relocation-model=pic -mcpu=mips64r5 -mattr=+fp64,+msa -disable-mips-delay-filler | FileCheck %s --check-prefixes=ALL,MIPS64R5
+
+
+
+; Test that vector types are passed through the integer register set whether or
+; not MSA is enabled. This is a ABI requirement for MIPS. For GCC compatibility
+; we need to handle any power of 2 number of elements. We will test this
+; exhaustively for combinations up to MSA register (128 bits) size.
+
+; First set of tests are for argument passing.
+
+define <2 x i8> @i8_2(<2 x i8> %a, <2 x i8> %b) {
+; ALL-LABEL: i8_2:
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32EL: addu $1, $4, $5
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64EL-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <2 x i8> %a, %b
+ ret <2 x i8> %1
+}
+
+; Test that vector spilled to the outgoing argument area have the expected
+; offset from $sp.
+
+define <2 x i8> @i8x2_7(<2 x i8> %a, <2 x i8> %b, <2 x i8> %c, <2 x i8> %d,
+ <2 x i8> %e, <2 x i8> %f, <2 x i8> %g) {
+entry:
+
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32EB-DAG: srl ${{[0-9]+}}, $7, 24
+
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $4, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $5, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $6, 65280
+; MIPS32EL-DAG: andi ${{[0-9]+}}, $7, 65280
+
+; MIPS32-DAG: lbu ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG; lbu ${{[0-9]+}}, 17($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 21($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lbu ${{[0-9]+}}, 25($sp)
+
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $7, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 40($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 41($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 42($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 43($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 44($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 45($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 46($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 47($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 48($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 49($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 50($sp)
+; MIPS32R5-DAG: lbu ${{[0-9]+}}, 51($sp)
+
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $8, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $9, 48
+; MIPS64EB-DAG: dsrl ${{[0-9]+}}, $10, 48
+
+; MIPS64R5-DAG: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $5, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $6, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $7, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $8, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $9, {{[0-9]+}}($sp)
+; MIPS64R5-DAG: sd $10, {{[0-9]+}}($sp)
+
+ %0 = add <2 x i8> %a, %b
+ %1 = add <2 x i8> %0, %c
+ %2 = add <2 x i8> %1, %d
+ %3 = add <2 x i8> %2, %e
+ %4 = add <2 x i8> %3, %f
+ %5 = add <2 x i8> %4, %g
+ ret <2 x i8> %5
+}
+
+define <4 x i8> @i8_4(<4 x i8> %a, <4 x i8> %b) {
+; ALL-LABEL: i8_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+ %1 = add <4 x i8> %a, %b
+ ret <4 x i8> %1
+}
+
+define <8 x i8> @i8_8(<8 x i8> %a, <8 x i8> %b) {
+; ALL-LABEL: i8_8:
+; MIPS32-NOT: lw
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 24
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 16
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R1]], 8
+; MIPS64-DAG: srl ${{[0-9]+}}, $[[R0]], 8
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <8 x i8> %a, %b
+ ret <8 x i8> %1
+}
+
+define <16 x i8> @i8_16(<16 x i8> %a, <16 x i8> %b) {
+; ALL-LABEL: i8_16:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 24
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 8
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 8
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 40
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 56
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = add <16 x i8> %a, %b
+
+ ret <16 x i8> %1
+}
+
+define <2 x i16> @i16_2(<2 x i16> %a, <2 x i16> %b) {
+; ALL-LABEL: i16_2:
+; MIPS32: addu $[[R0:[0-9]+]], $4, $5
+; MIPS32: andi $[[R1:[0-9]+]], $[[R0]], 65535
+; MIPS32: srl $[[R2:[0-9]+]], $5, 16
+; MIPS32: srl $[[R3:[0-9]+]], $4, 16
+; MIPS32: addu $[[R4:[0-9]+]], $[[R3]], $[[R2]]
+; MIPS32: sll $2, $[[R4]], 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64R5-DAG: sll ${{[0-9]+}}, $5, 0
+
+ %1 = add <2 x i16> %a, %b
+ ret <2 x i16> %1
+}
+
+define <4 x i16> @i16_4(<4 x i16> %a, <4 x i16> %b) {
+; ALL-LABEL: i16_4:
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <4 x i16> %a, %b
+ ret <4 x i16> %1
+}
+
+define <8 x i16> @i16_8(<8 x i16> %a, <8 x i16> %b) {
+; ALL-LABEL: i16_8:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: srl ${{[0-9]+}}, $7, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $6, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $5, 16
+; MIPS32-DAG: srl ${{[0-9]+}}, $4, 16
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 48
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = add <8 x i16> %a, %b
+ ret <8 x i16> %1
+}
+
+define <2 x i32> @i32_2(<2 x i32> %a, <2 x i32> %b) {
+; ALL-LABEL: i32_2:
+; MIPS32-DAG: addu $2, $4, $6
+; MIPS32-DAG: addu $3, $5, $7
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = add <2 x i32> %a, %b
+
+ ret <2 x i32> %1
+}
+
+define <4 x i32> @i32_4(<4 x i32> %a, <4 x i32> %b) {
+; ALL-LABEL: i32_4:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: sll ${{[0-9]+}}, $7, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $6, 32
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $7, 32
+ %1 = add <4 x i32> %a, %b
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @i64_2(<2 x i64> %a, <2 x i64> %b) {
+; ALL-LABEL: i64_2:
+; MIPS32-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: addu $2
+; MIPS32-DAG: addu $3
+; MIPS32-DAG: addu $4
+; MIPS32-DAG: addu $5
+
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 16($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: lw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $7
+
+; MIPS64-DAG: daddu $2, $4, $6
+; MIPS64-DAG: daddu $3, $5, $7
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = add <2 x i64> %a, %b
+ ret <2 x i64> %1
+}
+
+; The MIPS vector ABI treats vectors of floats differently to vectors of
+; integers.
+
+; For arguments floating pointer vectors are bitcasted to integer vectors whose
+; elements are of GPR width and where the element count is deduced from
+; the length of the floating point vector divided by the size of the GPRs.
+
+; For returns, integer vectors are passed via the GPR register set, but
+; floating point vectors are returned via a hidden sret pointer.
+
+; For testing purposes we skip returning values here and test them below
+; instead.
+@float_res_v2f32 = external global <2 x float>
+
+define void @float_2(<2 x float> %a, <2 x float> %b) {
+; ALL-LABEL: float_2:
+; MIPS32: mtc1 $7, $f[[F0:[0-9]+]]
+; MIPS32: mtc1 $5, $f[[F1:[0-9]+]]
+; MIPS32: add.s $f[[F2:[0-9]+]], $f[[F1]], $f[[F0]]
+; MIPS32: swc1 $f[[F2]]
+; MIPS32: mtc1 $6, $f[[F3:[0-9]+]]
+; MIPS32: mtc1 $4, $f[[F4:[0-9]+]]
+; MIPS32: add.s $f[[F5:[0-9]+]], $f[[F4]], $f[[F3]]
+; MIPS32: swc1 $f[[F5]]
+
+; MIPS32R5-DAG: sw $4
+; MIPS32R5-DAG: sw $5
+; MIPS32R5-DAG: sw $6
+; MIPS32R5-DAG: sw $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: sd $4
+; MIPS64R5-DAG: sd $5
+
+ %1 = fadd <2 x float> %a, %b
+ store <2 x float> %1, <2 x float> * @float_res_v2f32
+ ret void
+}
+
+@float_res_v4f32 = external global <4 x float>
+
+; For MSA this case is suboptimal, the 4 loads can be combined into a single
+; ld.w.
+
+define void @float_4(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: float_4:
+; MIPS32-DAG: mtc1 $4
+; MIPS32-DAG: mtc1 $5
+; MIPS32-DAG: mtc1 $6
+; MIPS32-DAG: mtc1 $7
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+; MIPS32-DAG: lwc1
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R2:[0-9]+]], $4, 32
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R2]], 0
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $6, 0
+; MIPS64-DAG: sll $[[R7:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R7]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R8:[0-9]+]], $6, 32
+; MIPS64-DAG: dsrl $[[R9:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R10:[0-9]+]], $[[R8]], 0
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R9]], 0
+; MIPS64-DAG: mtc1 $[[R10]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = fadd <4 x float> %a, %b
+ store <4 x float> %1, <4 x float> * @float_res_v4f32
+ ret void
+}
+
+@double_v2f64 = external global <2 x double>
+
+define void @double_2(<2 x double> %a, <2 x double> %b) {
+; ALL-LABEL: double_2:
+; MIPS32-DAG: sw $7
+; MIPS32-DAG: sw $6
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32: add.d
+; MIPS32-DAG: sw $5
+; MIPS32-DAG: sw $4
+; MIPS32-DAG: ldc1
+; MIPS32-DAG: ldc1
+; MIPS32: add.d
+
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $[[R1]]
+; MIPS32R5-DAG: lw $[[R2:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $[[R2]]
+; MIPS32R5-DAG: lw $[[R3:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R3]]
+; MIPS32R5-DAG: lw $[[R4:[0-9]+]], 28($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R4]]
+
+; MIPS32R5-DAG: insert.w $w[[W1:[0-9]+]][0], $4
+; MIPS32R5-DAG: insert.w $w[[W1]][1], $5
+; MIPS32R5-DAG: insert.w $w[[W1]][2], $6
+; MIPS32R5-DAG: insert.w $w[[W1]][3], $7
+
+; MIPS64-DAG: dmtc1 $6, $f[[R0:[0-9]+]]
+; MIPS64-DAG: dmtc1 $4, $f[[R1:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R2:[0-9]+]], $f[[R1]], $f[[R0]]
+; MIPS64-DAG: dmtc1 $7, $f[[R3:[0-9]+]]
+; MIPS64-DAG: dmtc1 $5, $f[[R4:[0-9]+]]
+; MIPS64-DAG: add.d $f[[R5:[0-9]+]], $f[[R4]], $f[[R3]]
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][{{[0-9]}}], $4
+; MIPS64R5-DAG: insert.d $w[[W0]][{{[0-9]}}], $5
+; MIPS64R5-DAG: insert.d $w[[W1:[0-9]+]][{{[0-9]}}], $6
+; MIPS64R5-DAG: insert.d $w[[W1]][{{[0-9]}}], $7
+
+ %1 = fadd <2 x double> %a, %b
+ store <2 x double> %1, <2 x double> * @double_v2f64
+ ret void
+}
+
+; Return value testing.
+; Integer vectors are returned in $2, $3, $4, $5 for O32, $2, $3 for N32/N64
+; Floating point vectors are returned through a hidden sret pointer.
+
+@gv2i8 = global <2 x i8> <i8 1, i8 2>
+@gv4i8 = global <4 x i8> <i8 0, i8 1, i8 2, i8 3>
+@gv8i8 = global <8 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>
+@gv16i8 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>
+
+@gv2i16 = global <2 x i16> <i16 1, i16 2>
+@gv4i16 = global <4 x i16> <i16 0, i16 1, i16 2, i16 3>
+@gv8i16 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
+
+@gv2i32 = global <2 x i32> <i32 0, i32 1>
+@gv4i32 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+
+@gv2i64 = global <2 x i64> <i64 0, i64 1>
+
+define <2 x i8> @ret_2_i8() {
+; ALL-LABEL: ret_2_i8:
+; MIPS32-DAG: lhu $2
+; MIPS32R5-DAG: lhu $2
+
+; FIXME: why is this lh instead of lhu on mips64?
+
+; MIPS64-DAG: lh $2
+; MIPS64-DAG: lh $2
+ %1 = load <2 x i8>, <2 x i8> * @gv2i8
+ ret <2 x i8> %1
+}
+
+define <4 x i8> @ret_4_i8() {
+; ALL-LABEL: ret_4_i8:
+; MIPS32-DAG: lw $2
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG: lw $2
+; MIPS64R5-DAG: lw $2
+
+ %1 = load <4 x i8>, <4 x i8> * @gv4i8
+ ret <4 x i8> %1
+}
+
+define <8 x i8> @ret_8_i8() {
+; ALL-LABEL: ret_8_i8:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG: ld $2
+; MIPS64R5-DAG: ld $2
+ %1 = load <8 x i8>, <8 x i8> * @gv8i8
+ ret <8 x i8> %1
+}
+
+define <16 x i8> @ret_16_i8() {
+; ALL-LABEL: ret_16_i8:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+ %1 = load <16 x i8>, <16 x i8> * @gv16i8
+ ret <16 x i8> %1
+}
+
+define <2 x i16> @ret_2_i16() {
+; ALL-LABEL: ret_2_i16:
+; MIPS32-DAG: lw $2
+
+; MIPS32R5-DAG: lw $2
+
+; MIPS64-DAG: lw $2
+
+; MIPS64R5-DAG: lw $2
+ %1 = load <2 x i16>, <2 x i16> * @gv2i16
+ ret <2 x i16> %1
+}
+
+define <4 x i16> @ret_4_i16() {
+; ALL-LABEL: ret_4_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG: ld $2
+; MIPS64R5-DAG: ld $2
+ %1 = load <4 x i16>, <4 x i16> * @gv4i16
+ ret <4 x i16> %1
+}
+
+define <8 x i16> @ret_8_i16() {
+; ALL-LABEL: ret_8_i16:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2
+; MIPS64R5-DAG: copy_s.d $3
+
+ %1 = load <8 x i16>, <8 x i16> * @gv8i16
+ ret <8 x i16> %1
+}
+
+define <2 x i32> @ret_2_i32() {
+; ALL-LABEL: ret_2_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]]
+
+; MIPS64-DAG: ld $2
+; MIPS64R5-DAG: ld $2
+
+ %1 = load <2 x i32>, <2 x i32> * @gv2i32
+ ret <2 x i32> %1
+}
+
+define <4 x i32> @ret_4_i32() {
+; ALL-LABEL: ret_4_i32:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+ %1 = load <4 x i32>, <4 x i32> * @gv4i32
+ ret <4 x i32> %1
+}
+
+define <2 x i64> @ret_2_i64() {
+; ALL-LABEL: ret_2_i64:
+; MIPS32-DAG: lw $2
+; MIPS32-DAG: lw $3
+; MIPS32-DAG: lw $4
+; MIPS32-DAG: lw $5
+
+; MIPS32R5-DAG: copy_s.w $2, $w[[W0:[0-9]+]][0]
+; MIPS32R5-DAG: copy_s.w $3, $w[[W0]][1]
+; MIPS32R5-DAG: copy_s.w $4, $w[[W0]][2]
+; MIPS32R5-DAG: copy_s.w $5, $w[[W0]][3]
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w[[W0:[0-9]+]]
+; MIPS64R5-DAG: copy_s.d $3, $w[[W0]]
+
+ %1 = load <2 x i64>, <2 x i64> * @gv2i64
+ ret <2 x i64> %1
+}
+
+@gv2f32 = global <2 x float> <float 0.0, float 0.0>
+@gv4f32 = global <4 x float> <float 0.0, float 0.0, float 0.0, float 0.0>
+
+define <2 x float> @ret_float_2() {
+entry:
+; ALL-LABEL: ret_float_2:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32R5-DAG: swc1 $f{{[0-9]+}}, 4($4)
+
+; MIPS64: ld $2
+
+; MIPS64R5: ld $2
+
+ %0 = load <2 x float>, <2 x float> * @gv2f32
+ ret <2 x float> %0
+}
+
+define <4 x float> @ret_float_4() {
+entry:
+; ALL-LABEL: ret_float_4:
+
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: st.w $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $3
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+ %0 = load <4 x float>, <4 x float> * @gv4f32
+ ret <4 x float> %0
+}
+
+@gv2f64 = global <2 x double> <double 0.0, double 0.0>
+
+define <2 x double> @ret_double_2() {
+entry:
+; ALL-LABEL: ret_double_2:
+
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: sdc1 $f{{[0-9]+}}, 0($4)
+
+; MIPS32R5: st.d $w{{[0-9]+}}, 0($4)
+
+; MIPS64-DAG: ld $2
+; MIPS64-DAG: ld $2
+
+; MIPS64R5-DAG: copy_s.d $2, $w{{[0-9]+}}[0]
+; MIPS64R5-DAG: copy_s.d $3, $w{{[0-9]+}}[1]
+
+ %0 = load <2 x double>, <2 x double> * @gv2f64
+ ret <2 x double> %0
+}
+
+; Test argument lowering and call result lowering.
+
+define void @call_i8_2() {
+entry:
+; ALL-LABEL: call_i8_2:
+; MIPS32EB-DAG: addiu $4
+; MIPS32EB-DAG: addiu $5
+; MIPS32-NOT: addiu $6
+; MIPS32-NOT: addiu $7
+
+; MIPS32R5-DAG: lhu $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lhu $5, {{[0-9]+}}($sp)
+
+; MIPS32R5: jal
+; MIPS32R5: sw $2, {{[0-9]+}}($sp)
+
+; MIPS32R5-DAG: sb ${{[0-9]+}}, 1(${{[0-9]+}})
+; MIPS32R5-DAG; sb ${{[0-9]+}}, %lo(gv2i8)(${{[0-9]+}})
+
+; MIPS64EB: daddiu $4, $zero, 1543
+; MIPS64EB: daddiu $5, $zero, 3080
+
+; MIPS64EL: daddiu $4, $zero, 1798
+; MIPS64EL; daddiu $5, $zero, 2060
+
+; MIPS64R5-DAG: lh $4
+; MIPS64R5-DAG: lh $5
+
+; MIPS32: jal i8_2
+; MIPS64: jalr $25
+
+; MIPS32EB-DAG: srl $[[R0:[0-9]+]], $2, 16
+; MIPS32EB-DAG: sb $[[R0]]
+; MIPS32EB-DAG: srl $[[R1:[0-9]+]], $2, 24
+; MIPS32EB-DAG: sb $[[R1]]
+
+; MIPS32EL: sb $2
+; MIPS32EL: srl $[[R0:[0-9]+]], $2, 8
+; MIPS32EL: sb $[[R0]]
+
+; MIPS64EB: dsrl $[[R4:[0-9]+]], $2, 48
+; MIPS64EB: sb $[[R4]]
+; MIPS64EB: dsrl $[[R5:[0-9]+]], $2, 56
+; MIPS64EB: sb $[[R5]]
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $2, 0
+; MIPS64EL: sb $[[R6]]
+; MIPS64EL: srl $[[R7:[0-9]+]], $[[R6]], 8
+; MIPS64EL: sb $[[R7]]
+
+; MIPS64R5: sd $2
+
+ %0 = call <2 x i8> @i8_2(<2 x i8> <i8 6, i8 7>, <2 x i8> <i8 12, i8 8>)
+ store <2 x i8> %0, <2 x i8> * @gv2i8
+ ret void
+}
+
+define void @call_i8_4() {
+entry:
+; ALL-LABEL: call_i8_4:
+; MIPS32: ori $4
+; MIPS32: ori $5
+; MIPS32-NOT: ori $6
+; MIPS32-NOT: ori $7
+
+; MIPS32R5-DAG: lw $4, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: lw $5, {{[0-9]+}}($sp)
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5: lw $4
+; MIPS64R5: lw $5
+
+; MIPS32: jal i8_4
+; MIPS64: jalr $25
+
+; MIPS32: sw $2
+
+; MIPS32R5-DAG: sw $2
+
+; MIPS64: sw $2
+; MIPS64R5: sw $2
+
+ %0 = call <4 x i8> @i8_4(<4 x i8> <i8 6, i8 7, i8 9, i8 10>, <4 x i8> <i8 12, i8 8, i8 9, i8 10>)
+ store <4 x i8> %0, <4 x i8> * @gv4i8
+ ret void
+}
+
+define void @call_i8_8() {
+entry:
+; ALL-LABEL: call_i8_8:
+
+; MIPS32: ori $6
+; MIPS32: ori $4
+; MIPS32: move $5
+; MIPS32: move $7
+
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: move $5
+; MIPS32R5-DAG: move $7
+
+; MIPS64EB: daddiu $4, ${{[0-9]+}}, 2314
+; MIPS64EB: daddiu $5, ${{[0-9]+}}, 2314
+
+; MIPS64EL: daddiu $4, ${{[0-9]+}}, 1798
+; MIPS64EL: daddiu $5, ${{[0-9]+}}, 2060
+
+; MIPS32: jal i8_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2
+; MIPS32-DAG: sw $3
+
+; MIPS32R5-DAG: sw $2
+; MIPS32R5-DAG: sw $3
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+ %0 = call <8 x i8> @i8_8(<8 x i8> <i8 6, i8 7, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>, <8 x i8> <i8 12, i8 8, i8 9, i8 10, i8 6, i8 7, i8 9, i8 10>)
+ store <8 x i8> %0, <8 x i8> * @gv8i8
+ ret void
+}
+
+define void @calli8_16() {
+entry:
+; ALL-LABEL: calli8_16:
+; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: ori $7, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32: move $5, ${{[0-9]+}}
+; MIPS32: move $6, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $7
+
+; MIPS64R5-DAG: copy_s.d $4
+; MIPS64R5-DAG: copy_s.d $5
+; MIPS64R5-DAG: copy_s.d $6
+; MIPS64R5-DAG: copy_s.d $7
+
+; MIPS32: jal i8_16
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv16i8)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <16 x i8> @i8_16(<16 x i8> <i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7,i8 6, i8 7, i8 6, i8 7, i8 9, i8 10>, <16 x i8> <i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 7, i8 9,i8 12, i8 8, i8 9, i8 10>)
+ store <16 x i8> %0, <16 x i8> * @gv16i8
+ ret void
+}
+
+define void @calli16_2() {
+entry:
+; ALL-LABEL: calli16_2:
+
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+
+; MIPS32R5-DAG: lw $4
+; MIPS32R5-DAG: lw $5
+
+; MIPS64: ori $4
+; MIPS64: ori $5
+
+; MIPS64R5-DAG: lw $4
+; MIPS64R5-DAG: lw $5
+
+; MIPS32: jal i16_2
+; MIPS64: jalr $25
+
+; MIPS32: sw $2, %lo(gv2i16)
+
+; MIPS32R5: sw $2, %lo(gv2i16)
+
+; MIPS64: sw $2
+
+; MIPS64R6: sw $2
+
+ %0 = call <2 x i16> @i16_2(<2 x i16> <i16 6, i16 7>, <2 x i16> <i16 12, i16 8>)
+ store <2 x i16> %0, <2 x i16> * @gv2i16
+ ret void
+}
+
+define void @calli16_4() {
+entry:
+; ALL-LABEL: calli16_4:
+; MIPS32-DAG: ori $4
+; MIPS32-DAG: ori $5
+; MIPS32-DAG: ori $6
+; MIPS32-DAG: move $7
+
+; MIPS32R5-DAG: ori $4
+; MIPS32R5-DAG: ori $5
+; MIPS32R5-DAG: ori $6
+; MIPS32R5-DAG: move $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i16_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32R5-DAG: sw $2, %lo(gv4i16)(${{[0-9]+}})
+
+; MIPS64: sd $2
+; MIPS64R5: sd $2
+
+ %0 = call <4 x i16> @i16_4(<4 x i16> <i16 6, i16 7, i16 9, i16 10>, <4 x i16> <i16 12, i16 8, i16 9, i16 10>)
+ store <4 x i16> %0, <4 x i16> * @gv4i16
+ ret void
+}
+
+define void @calli16_8() {
+entry:
+; ALL-LABEL: calli16_8:
+
+; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: ori $4, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: ori $5, ${{[0-9]+}}, {{[0-9]+}}
+; MIPS32-DAG: move $6, ${{[0-9]+}}
+; MIPS32-DAG: move $7, ${{[0-9]+}}
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $7
+; MIPS64-DAG: move $5
+; MIPS64-DAG: move $6
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i16_8
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv8i16)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64: sd $3
+; MIPS64: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W2:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W2]][1], $3
+
+ %0 = call <8 x i16> @i16_8(<8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 6, i16 7, i16 9, i16 10>, <8 x i16> <i16 6, i16 7, i16 9, i16 10, i16 12, i16 8, i16 9, i16 10>)
+ store <8 x i16> %0, <8 x i16> * @gv8i16
+ ret void
+}
+
+define void @calli32_2() {
+entry:
+; ALL-LABEL: calli32_2:
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+
+; MIPS64R5-DAG: ld $4
+; MIPS64R5-DAG: ld $5
+
+; MIPS32: jal i32_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS32R5-DAG: sw $2, %lo(gv2i32)(${{[0-9]+}})
+; MIPS32R5-DAG: sw $3, 4(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+ %0 = call <2 x i32> @i32_2(<2 x i32> <i32 6, i32 7>, <2 x i32> <i32 12, i32 8>)
+ store <2 x i32> %0, <2 x i32> * @gv2i32
+ ret void
+}
+
+define void @calli32_4() {
+entry:
+; ALL-LABEL: calli32_4:
+
+; MIPS32-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS32R5-DAG: addiu $4
+; MIPS32R5-DAG: addiu $5
+; MIPS32R5-DAG: addiu $6
+; MIPS32R5-DAG: addiu $7
+
+; MIPS64-DAG: daddiu $4
+; MIPS64-DAG: daddiu $6
+; MIPS64-DAG: daddiu $5
+; MIPS64-DAG: move $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w[[W0:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $5, $w[[W0]][1]
+; MIPS64R5-DAG: copy_s.d $6, $w[[W1:[0-9]+]][0]
+; MIPS64R5-DAG: copy_s.d $7, $w[[W1]][1]
+
+; MIPS32: jal i32_4
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv4i32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <4 x i32> @i32_4(<4 x i32> <i32 6, i32 7, i32 9, i32 10>, <4 x i32> <i32 12, i32 8, i32 9, i32 10>)
+ store <4 x i32> %0, <4 x i32> * @gv4i32
+ ret void
+}
+
+define void @calli64_2() {
+entry:
+; ALL-LABEL: calli64_2:
+
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4
+; MIPS32-DAG: addiu $5
+; MIPS32-DAG: addiu $6
+; MIPS32-DAG: addiu $7
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64: daddiu $4
+; MIPS64: daddiu $5
+; MIPS64: daddiu $6
+; MIPS64: daddiu $7
+
+; MIPS64R5: daddiu $4
+; MIPS64R5: daddiu $5
+; MIPS64R5: daddiu $6
+; MIPS64R5: daddiu $7
+
+; MIPS32: jal i64_2
+; MIPS64: jalr $25
+
+; MIPS32-DAG: sw $5, 12(${{[0-9]+}})
+; MIPS32-DAG: sw $4, 8(${{[0-9]+}})
+; MIPS32-DAG: sw $3, 4(${{[0-9]+}})
+; MIPS32-DAG: sw $2, %lo(gv2i64)(${{[0-9]+}})
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $2
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $3
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $4
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $5
+; MIPS32R5-DAG: st.w $w[[W0]]
+
+; MIPS64-DAG: sd $3
+; MIPS64-DAG: sd $2
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R6-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <2 x i64> @i64_2(<2 x i64> <i64 6, i64 7>, <2 x i64> <i64 12, i64 8>)
+ store <2 x i64> %0, <2 x i64> * @gv2i64
+ ret void
+}
+
+declare <2 x float> @float2_extern(<2 x float>, <2 x float>)
+declare <4 x float> @float4_extern(<4 x float>, <4 x float>)
+declare <2 x double> @double2_extern(<2 x double>, <2 x double>)
+
+define void @callfloat_2() {
+entry:
+; ALL-LABEL: callfloat_2:
+
+; MIPS32-DAG: addiu $4, $sp, 24
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: lui $7
+
+; MIPS32R5-DAG: addiu $4, $sp, 24
+; MIPS32R5-DAG: addiu $6, $zero, 0
+; MIPS32R5-DAG: lui $7
+
+; MIPS64: dsll $4
+; MIPS64: dsll $5
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+
+; MIPS32: jal float2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS32R5-DAG: lwc1 $f[[F0:[0-9]+]], 24($sp)
+; MIPS32R5-DAG: lwc1 $f[[F1:[0-9]+]], 28($sp)
+
+; MIPS32R5-DAG: swc1 $f[[F1]], 4(${{[0-9]+}})
+; MIPS32R5-DAG: swc1 $f[[F0]], %lo(gv2f32)(${{[0-9]+}})
+
+; MIPS64: sd $2
+
+; MIPS64R5: sd $2
+
+ %0 = call <2 x float> @float2_extern(<2 x float> <float 0.0, float -1.0>, <2 x float> <float 12.0, float 14.0>)
+ store <2 x float> %0, <2 x float> * @gv2f32
+ ret void
+}
+
+define void @callfloat_4() {
+entry:
+; ALL-LABEL: callfloat_4:
+
+; MIPS32: sw ${{[0-9]+}}, 36($sp)
+; MIPS32: sw ${{[0-9]+}}, 32($sp)
+; MIPS32: sw ${{[0-9]+}}, 28($sp)
+; MIPS32: sw ${{[0-9]+}}, 24($sp)
+; MIPS32: sw ${{[0-9]+}}, 20($sp)
+; MIPS32: sw ${{[0-9]+}}, 16($sp)
+; MIPS32: addiu $4, $sp, 48
+; MIPS32: addiu $6, $zero, 0
+; MIPS32: lui $7
+
+; MIPS32R5: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5: copy_s.w $7, $w{{[0-9]+}}
+; MIPS32R5: sw ${{[0-9]+}}, 36($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 32($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5: sw ${{[0-9]+}}, 16($sp)
+; MIPS32R5: addiu $4, $sp, 48
+
+; MIPS64-DAG: dsll $4
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS64: jalr $25
+; MIPS32: jal
+
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 52($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 56($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 60($sp)
+
+; MIPS32R5: ld.w $w{{[0-9]+}}, 48($sp)
+
+; MIPS64-DAG: $2
+; MIPS64-DAG: $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <4 x float> @float4_extern(<4 x float> <float 0.0, float -1.0, float 2.0, float 4.0>, <4 x float> <float 12.0, float 14.0, float 15.0, float 16.0>)
+ store <4 x float> %0, <4 x float> * @gv4f32
+ ret void
+}
+
+define void @calldouble_2() {
+entry:
+; ALL-LABEL: calldouble_2:
+
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 36($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 32($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 28($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 24($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 20($sp)
+; MIPS32-DAG: sw ${{[0-9a-z]+}}, 16($sp)
+
+; MIPS32-DAG: addiu $4, $sp, [[R0:[0-9]+]]
+; MIPS32-DAG: addiu $6, $zero, 0
+; MIPS32-DAG: addiu $7, $zero, 0
+
+; MIPS32R5-DAG: copy_s.w $4, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $5, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $6, $w{{[0-9]+}}
+; MIPS32R5-DAG: copy_s.w $7, $w{{[0-9]+}}
+
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 36($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 32($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 28($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 24($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 20($sp)
+; MIPS32R5-DAG: sw ${{[0-9]+}}, 16($sp)
+
+; MIPS64-DAG: dsll $5
+; MIPS64-DAG: dsll $6
+; MIPS64-DAG: dsll $7
+; MIPS64-DAG: daddiu $4
+
+; MIPS64R5-DAG: copy_s.d $4, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $5, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $6, $w{{[0-9]+}}
+; MIPS64R5-DAG: copy_s.d $7, $w{{[0-9]+}}
+
+; MIPS32: jal double2_extern
+; MIPS64: jalr $25
+
+; MIPS32-DAG: ldc1 $f[[F0:[0-9]+]], 48($sp)
+; MIPS32-DAG: ldc1 $f[[F1:[0-9]+]], 56($sp)
+
+; MIPS32-DAG: sdc1 $f[[F1]], 8(${{[0-9]+}})
+; MIPS32-DAG: sdc1 $f[[F0]], %lo(gv2f64)(${{[0-9]+}})
+
+; MIPS32R5: ld.d $w[[W0:[0-9]+]], 48($sp)
+; MIPS32R5: st.d $w[[W0]], 0(${{[0-9]+}})
+
+; MIPS64-DAG: sd $2
+; MIPS64-DAG: sd $3
+
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][0], $2
+; MIPS64R5-DAG: insert.d $w[[W0:[0-9]+]][1], $3
+
+ %0 = call <2 x double> @double2_extern(<2 x double> <double 0.0, double -1.0>, <2 x double> <double 12.0, double 14.0>)
+ store <2 x double> %0, <2 x double> * @gv2f64
+ ret void
+}
+
+; The mixed tests show that due to alignment requirements, $5 is not used
+; in argument passing.
+
+define float @mixed_i8(<2 x float> %a, i8 %b, <2 x float> %c) {
+entry:
+; ALL-LABEL: mixed_i8:
+
+; MIPS32-DAG: mtc1 $5, $f{{[0-9]+}}
+; MIPS32: andi $[[R7:[0-9]+]], $6, 255
+; MIPS32: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS32: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS32-DAG: mtc1 $4, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 16($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 20($sp)
+; MIPS32-DAG: add.s $f0, $f{{[0-9]+}}, $f{{[0-9]+}}
+
+; MIPS32R5: andi $[[R0:[0-9]+]], $6, 255
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5: sw $[[R0]], {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $5, {{[0-9]+}}($sp)
+; MIPS32R5-DAG: sw $4, {{[0-9]+}}($sp)
+
+; MIPS64EB-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EB-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EB: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EB: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EB: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EB: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EB-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EB-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EB-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EB-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EB-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64EB-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EB-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EB-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: dsrl $[[R1:[0-9]+]], $4, 32
+; MIPS64EL-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64EL-DAG: mtc1 $[[R2:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64EL: andi $[[R7:[0-9]+]], $[[R6]], 255
+; MIPS64EL: mtc1 $[[R7]], $f[[F0:[0-9]+]]
+; MIPS64EL: cvt.s.w $f{{[0-9]+}}, $f[[F0]]
+
+; MIPS64EL-DAG: dsrl $[[R4:[0-9]+]], $6, 32
+; MIPS64EL-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64EL-DAG: mtc1 $[[R5:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64EL-DAG: sll $[[R0:[0-9]+]], $4, 0
+; MIPS64EL-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64EL-DAG: sll $[[R3:[0-9]+]], $6, 0
+; MIPS64EL-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+
+; MIPS64R5: sll $[[R0:[0-9]+]], $5, 0
+; MIPS64R5: andi $[[R1:[0-9]+]], $[[R0]], 255
+; MIPS64R5: sd $4, {{[0-9]+}}($sp)
+; MIPS64R5: sd $6, {{[0-9]+}}($sp)
+
+ %0 = zext i8 %b to i32
+ %1 = uitofp i32 %0 to float
+ %2 = insertelement <2 x float> undef, float %1, i32 0
+ %3 = insertelement <2 x float> %2, float %1, i32 1
+ %4 = fadd <2 x float> %3, %a
+ %5 = fadd <2 x float> %4, %c
+ %6 = extractelement <2 x float> %5, i32 0
+ %7 = extractelement <2 x float> %5, i32 1
+ %8 = fadd float %6, %7
+ ret float %8
+}
+
+define <4 x float> @mixed_32(<4 x float> %a, i32 %b) {
+entry:
+; ALL-LABEL: mixed_32:
+
+; MIPS32-DAG: mtc1 $6, $f{{[0-9]+}}
+; MIPS32-DAG: mtc1 $7, $f{{[0-9]+}}
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 28($sp)
+; MIPS32-DAG: lwc1 $f{{[0-9]+}}, 24($sp)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 0($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 4($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 8($4)
+; MIPS32-DAG: swc1 $f{{[0-9]+}}, 12($4)
+
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][1], $7
+; MIPS32R5: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][2], $[[R0]]
+; MIPS32R5: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5: insert.w $w[[W0:[0-9]+]][3], $[[R1]]
+; MIPS32R5: lw $[[R0:[0-9]+]], 24($sp)
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $6, 0
+; MIPS64-DAG: dsrl $[[R0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R1:[0-9]+]], $[[R0]], 0
+; MIPS64-DAG: mtc1 $[[R1]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $4, 0
+; MIPS64-DAG: dsrl $[[R3:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R4:[0-9]+]], $[[R3]], 0
+; MIPS64-DAG: mtc1 $[[R4]], $f{{[0-9]+}}
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $5, 0
+; MIPS64-DAG: mtc1 $[[R6:[0-9]+]], $f{{[0-9]+}}
+
+; MIPS64R5: insert.d $w[[W0:[0-9]+]][0], $4
+; MIPS64R5: insert.d $w[[W0]][1], $5
+; MIPS64R5: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64R5: fill.w $w{{[0-9]+}}, $[[R0]]
+
+ %0 = uitofp i32 %b to float
+ %1 = insertelement <4 x float> undef, float %0, i32 0
+ %2 = insertelement <4 x float> %1, float %0, i32 1
+ %3 = insertelement <4 x float> %2, float %0, i32 2
+ %4 = insertelement <4 x float> %3, float %0, i32 3
+ %5 = fadd <4 x float> %4, %a
+ ret <4 x float> %5
+}
+
+
+; This test is slightly more fragile than I'd like as the offset into the
+; outgoing arguments area is dependant on the size of the stack frame for
+; this function.
+
+define <4 x float> @cast(<4 x i32> %a) {
+entry:
+; ALL-LABEL: cast:
+
+; MIPS32: addiu $sp, $sp, -32
+; MIPS32-DAG: sw $6, {{[0-9]+}}($sp)
+; MIPS32-DAG: sw $7, {{[0-9]+}}($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 48($sp)
+; MIPS32-DAG: lw ${{[0-9]+}}, 52($sp)
+
+; MIPS32R5-DAG: insert.w $w0[0], $6
+; MIPS32R5-DAG: insert.w $w0[1], $7
+; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: insert.w $w0[2], $[[R0]]
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w0[3], $[[R1]]
+
+; MIPS64-DAG: sll ${{[0-9]+}}, $4, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $4, 32
+; MIPS64-DAG: sll ${{[0-9]+}}, $5, 0
+; MIPS64-DAG: dsrl ${{[0-9]+}}, $5, 32
+
+; MIPS64R5-DAG: insert.d $w0[0], $4
+; MIPS64R5-DAG: insert.d $w0[1], $5
+
+ %0 = uitofp <4 x i32> %a to <4 x float>
+ ret <4 x float> %0
+}
+
+define <4 x float> @select(<4 x i32> %cond, <4 x float> %arg1, <4 x float> %arg2) {
+entry:
+; ALL-LABEL: select:
+
+; MIPS32-DAG: andi ${{[0-9]+}}, $7, 1
+; MIPS32-DAG: andi ${{[0-9]+}}, $6, 1
+; MIPS32-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+; MIPS32-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32-DAG: andi ${{[0-9]+}}, $[[R0]], 1
+
+; MIPS32R5-DAG: insert.w $w[[W0:[0-9]+]][0], $6
+; MIPS32R5-DAG: insert.w $w[[W0]][1], $7
+; MIPS32R5-DAG: lw $[[R0:[0-9]+]], 16($sp)
+; MIPS32R5-DAG: lw $[[R1:[0-9]+]], 20($sp)
+; MIPS32R5-DAG: insert.w $w[[W0]][2], $[[R0]]
+; MIPS32R5-DAG: insert.w $w[[W0]][3], $[[R1]]
+; MIPS32R5-DAG: slli.w $w{{[0-9]}}, $w[[W0]]
+
+; MIPS64-DAG: sll $[[R0:[0-9]+]], $6, 0
+; MIPS64-DAG: mtc1 $[[R0]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R1:[0-9]+]], $6, 32
+; MIPS64-DAG: sll $[[R2:[0-9]+]], $[[R1]], 0
+; MIPS64-DAG: mtc1 $[[R2]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R3:[0-9]+]], $7, 0
+; MIPS64-DAG: mtc1 $[[R3]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R4:[0-9]+]], $7, 32
+; MIPS64-DAG: sll $[[R5:[0-9]+]], $[[R4]], 0
+; MIPS64-DAG: mtc1 $[[R5]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R6:[0-9]+]], $8, 0
+; MIPS64-DAG: mtc1 $[[R6]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R7:[0-9]+]], $8, 32
+; MIPS64-DAG: sll $[[R8:[0-9]+]], $[[R7]], 0
+; MIPS64-DAG: mtc1 $[[R8]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R9:[0-9]+]], $9, 0
+; MIPS64-DAG: mtc1 $[[R9]], $f{{[0-9]+}}
+; MIPS64-DAG: dsrl $[[R10:[0-9]+]], $9, 32
+; MIPS64-DAG: sll $[[R11:[0-9]+]], $[[R10]], 0
+; MIPS64-DAG: mtc1 $[[R11]], $f{{[0-9]+}}
+
+; MIPS64-DAG: sll $[[R12:[0-9]+]], $4, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R12]], 1
+; MIPS64-DAG: dsrl $[[R13:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R14:[0-9]+]], $[[R13]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R14]], 1
+
+; MIPS64-DAG: sll $[[R15:[0-9]+]], $5, 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R15]], 1
+; MIPS64-DAG: dsrl $[[R16:[0-9]+]], $5, 32
+; MIPS64-DAG: sll $[[R17:[0-9]+]], $[[R16]], 0
+; MIPS64-DAG: andi ${{[0-9]+}}, $[[R17]], 1
+
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $8
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $9
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $6
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $7
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[0], $4
+; MIPS64R5-DAG: insert.d $w{{[0-9]+}}[1], $5
+
+ %cond.t = trunc <4 x i32> %cond to <4 x i1>
+ %res = select <4 x i1> %cond.t, <4 x float> %arg1, <4 x float> %arg2
+ ret <4 x float> %res
+}
diff --git a/llvm/test/CodeGen/Mips/ctlz-v.ll b/llvm/test/CodeGen/Mips/ctlz-v.ll
index 3d580e5771f..156c640681b 100644
--- a/llvm/test/CodeGen/Mips/ctlz-v.ll
+++ b/llvm/test/CodeGen/Mips/ctlz-v.ll
@@ -8,10 +8,14 @@ entry:
; MIPS32: clz $2, $4
; MIPS32: clz $3, $5
-; MIPS64-DAG: sll $[[A0:[0-9]+]], $4, 0
-; MIPS64-DAG: clz $2, $[[A0]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: clz $3, $[[A1]]
+; MIPS64-DAG: dsrl $[[A0:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[A1:[0-9]+]], $[[A0]], 0
+; MIPS64-DAG: clz $[[R0:[0-9]+]], $[[A1]]
+; MIPS64-DAG: dsll $[[R1:[0-9]+]], $[[R0]], 32
+; MIPS64-DAG: sll $[[A2:[0-9]+]], $4, 0
+; MIPS64-DAG: clz $[[R2:[0-9]+]], $[[A2]]
+; MIPS64-DAG: dext $[[R3:[0-9]+]], $[[R2]], 0, 32
+; MIPS64-DAG: or $2, $[[R3]], $[[R1]]
%ret = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %x, i1 true)
ret <2 x i32> %ret
diff --git a/llvm/test/CodeGen/Mips/cttz-v.ll b/llvm/test/CodeGen/Mips/cttz-v.ll
index 85f69f9a17d..dbcde7f5fe5 100644
--- a/llvm/test/CodeGen/Mips/cttz-v.ll
+++ b/llvm/test/CodeGen/Mips/cttz-v.ll
@@ -24,14 +24,17 @@ entry:
; MIPS64-DAG: and $[[R2:[0-9]+]], $[[R1]], $[[R0]]
; MIPS64-DAG: clz $[[R3:[0-9]+]], $[[R2]]
; MIPS64-DAG: addiu $[[R4:[0-9]+]], $zero, 32
-; MIPS64-DAG: subu $2, $[[R4]], $[[R3]]
-; MIPS64-DAG: sll $[[A1:[0-9]+]], $5, 0
-; MIPS64-DAG: addiu $[[R5:[0-9]+]], $[[A1]], -1
-; MIPS64-DAG: not $[[R6:[0-9]+]], $[[A1]]
-; MIPS64-DAG: and $[[R7:[0-9]+]], $[[R6]], $[[R5]]
-; MIPS64-DAG: clz $[[R8:[0-9]+]], $[[R7]]
-; MIPS64-DAG: jr $ra
-; MIPS64-DAG: subu $3, $[[R4]], $[[R8]]
+; MIPS64-DAG: subu $[[R5:[0-9]+]], $[[R4]], $[[R3]]
+; MIPS64-DAG: dsrl $[[R6:[0-9]+]], $4, 32
+; MIPS64-DAG: sll $[[R7:[0-9]+]], $[[R6]], 0
+; MIPS64-DAG: dext $[[R8:[0-9]+]], $[[R5]], 0, 32
+; MIPS64-DAG: addiu $[[R9:[0-9]+]], $[[R7]], -1
+; MIPS64-DAG: not $[[R10:[0-9]+]], $[[R7]]
+; MIPS64-DAG: and $[[R11:[0-9]+]], $[[R10]], $[[R9]]
+; MIPS64-DAG: clz $[[R12:[0-9]+]], $[[R11]]
+; MIPS64-DAG: subu $[[R13:[0-9]+]], $[[R4]], $[[R12]]
+; MIPS64-DAG: dsll $[[R14:[0-9]+]], $[[R13]], 32
+; MIPS64-DAG: or $2, $[[R8]], $[[R14]]
%ret = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %x, i1 true)
ret <2 x i32> %ret
diff --git a/llvm/test/CodeGen/Mips/return-vector.ll b/llvm/test/CodeGen/Mips/return-vector.ll
index 08eddf37009..c59695d1873 100644
--- a/llvm/test/CodeGen/Mips/return-vector.ll
+++ b/llvm/test/CodeGen/Mips/return-vector.ll
@@ -128,8 +128,11 @@ entry:
; CHECK-LABEL: call_f2:
; CHECK: call16(f2)
-; CHECK-NOT: lwc1
-; CHECK: add.s $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
+; CHECK: addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG: lwc1 $f[[F0:[0-9]]], [[O0]]($sp)
+; CHECK-DAG: lwc1 $f[[F1:[0-9]]], 20($sp)
+; CHECK: add.s $f0, $f[[F0]], $f[[F1]]
+
}
@@ -143,11 +146,12 @@ entry:
; CHECK-LABEL: call_d2:
; CHECK: call16(d2)
-; CHECK-NOT: ldc1
-; CHECK: add.d $[[R2:[a-z0-9]+]], $[[R0:[a-z0-9]+]], $[[R1:[a-z0-9]+]]
-}
-
+; CHECK: addiu $4, $sp, [[O0:[0-9]+]]
+; CHECK-DAG: ldc1 $f[[F0:[0-9]+]], 24($sp)
+; CHECK-DAG: ldc1 $f[[F1:[0-9]+]], [[O0]]($sp)
+; CHECK: add.d $f0, $f[[F1]], $f[[F0]]
+}
; Check that function returns vector on stack in cases when vector can't be
; returned in registers. Also check that vector is placed on stack starting
@@ -179,11 +183,12 @@ entry:
ret <4 x float> %vecins4
; CHECK-LABEL: return_f4:
-; CHECK-DAG: lwc1 $[[R0:[a-z0-9]+]], 16($sp)
-; CHECK-DAG: swc1 $[[R0]], 12($4)
+; CHECK-DAG: lwc1 $f[[R0:[0-9]+]], 16($sp)
+; CHECK-DAG: swc1 $f[[R0]], 12($4)
; CHECK-DAG: sw $7, 8($4)
; CHECK-DAG: sw $6, 4($4)
; CHECK-DAG: sw $5, 0($4)
+
}
@@ -227,8 +232,8 @@ entry:
ret <2 x float> %vecins2
; CHECK-LABEL: return_f2:
-; CHECK: mov.s $f0, $f12
-; CHECK: mov.s $f2, $f14
+; CHECK-DAG: sw $5, 0($4)
+; CHECK-DAG: sw $6, 4($4)
}
@@ -239,6 +244,10 @@ entry:
ret <2 x double> %vecins2
; CHECK-LABEL: return_d2:
-; CHECK: mov.d $f0, $f12
-; CHECK: mov.d $f2, $f14
+; CHECK-DAG: ldc1 $f[[F0:[0-9]]], 16($sp)
+; CHECK-DAG: sdc1 $f[[F0]], 8($4)
+; CHECK-DAG: mtc1 $6, $f[[F1:[0-9]+]]
+; CHECK-DAG: mtc1 $7, $f
+; CHECK-DAG: sdc1 $f[[F0]], 0($4)
+
}
OpenPOWER on IntegriCloud