diff options
Diffstat (limited to 'llvm/lib')
29 files changed, 379 insertions, 82 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index dc1e8d3d241..86e3020d6d2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -9867,6 +9867,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } + // Analyses past this point are naive and don't expect an assertion. + if (Res.getOpcode() == ISD::AssertZext) + Res = Res.getOperand(0); + // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp index 5a5f0cd63ac..7c6860eb26c 100644 --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) { setLibcallName(RTLIB::BZERO, "__bzero"); break; case Triple::aarch64: + case Triple::aarch64_32: setLibcallName(RTLIB::BZERO, "bzero"); break; default: diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 653f30a12a2..68a14453e76 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -155,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx, break; case Triple::aarch64: case Triple::aarch64_be: + case Triple::aarch64_32: // The small model guarantees static code/data size < 4GB, but not where it // will be in memory. Most of these could end up >2GB away so even a signed // pc-relative 32-bit address is insufficient, theoretically. diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index d59a7a6e29f..0295db7633d 100644 --- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -120,7 +120,8 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES, return make_error<StringError>( std::string("No callback manager available for ") + T.str(), inconvertibleErrorCode()); - case Triple::aarch64: { + case Triple::aarch64: + case Triple::aarch64_32: { typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT; return CCMgrT::Create(ES, ErrorHandlerAddress); } @@ -168,6 +169,7 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) { }; case Triple::aarch64: + case Triple::aarch64_32: return [](){ return std::make_unique< orc::LocalIndirectStubsManager<orc::OrcAArch64>>(); diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp index 863698eb725..93aabd817d6 100644 --- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp @@ -90,6 +90,7 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES, inconvertibleErrorCode()); case Triple::aarch64: + case Triple::aarch64_32: return LocalLazyCallThroughManager::Create<OrcAArch64>(ES, ErrorHandlerAddr); diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index 4b328624ccd..20dccd84803 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -919,7 +919,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE, uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr, unsigned AbiVariant) { - if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) { + if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be || + Arch == Triple::aarch64_32) { // This stub has to be able to access the full address space, // since symbol lookup won't necessarily find a handy, in-range, // PLT stub for functions which could be anywhere. diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index a6a818601c6..9ca76602ea1 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -357,6 +357,8 @@ RuntimeDyldMachO::create(Triple::ArchType Arch, return std::make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver); case Triple::aarch64: return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver); + case Triple::aarch64_32: + return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver); case Triple::x86: return std::make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver); case Triple::x86_64: diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp index bd03184b03c..199c6994992 100644 --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -365,7 +365,8 @@ bool LTOCodeGenerator::determineTarget() { MCpu = "core2"; else if (Triple.getArch() == llvm::Triple::x86) MCpu = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) MCpu = "cyclone"; } diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp index 7ffe7bf84ba..587b332e706 100644 --- a/llvm/lib/LTO/LTOModule.cpp +++ b/llvm/lib/LTO/LTOModule.cpp @@ -220,7 +220,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options, CPU = "core2"; else if (Triple.getArch() == llvm::Triple::x86) CPU = "yonah"; - else if (Triple.getArch() == llvm::Triple::aarch64) + else if (Triple.getArch() == llvm::Triple::aarch64 || + Triple.getArch() == llvm::Triple::aarch64_32) CPU = "cyclone"; } diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp index 6c8f827d59a..02c0d41d132 100644 --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -489,7 +489,8 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder, TMBuilder.MCpu = "core2"; else if (TheTriple.getArch() == llvm::Triple::x86) TMBuilder.MCpu = "yonah"; - else if (TheTriple.getArch() == llvm::Triple::aarch64) + else if (TheTriple.getArch() == llvm::Triple::aarch64 || + TheTriple.getArch() == llvm::Triple::aarch64_32) TMBuilder.MCpu = "cyclone"; } TMBuilder.TheTriple = std::move(TheTriple); diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp index 861e5013b6b..70c0409ece7 100644 --- a/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/llvm/lib/MC/MCObjectFileInfo.cpp @@ -28,7 +28,7 @@ static bool useCompactUnwind(const Triple &T) { return false; // aarch64 always has it. - if (T.getArch() == Triple::aarch64) + if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) return true; // armv7k always has it. @@ -57,7 +57,8 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT, SectionKind::getReadOnly()); - if (T.isOSDarwin() && T.getArch() == Triple::aarch64) + if (T.isOSDarwin() && + (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)) SupportsCompactUnwindWithoutEHFrame = true; if (T.isWatchABI()) @@ -193,7 +194,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) { if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF - else if (T.getArch() == Triple::aarch64) + else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32) CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb) CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index e2c9a43b84b..21048998edf 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1203,4 +1203,6 @@ extern "C" void LLVMInitializeAArch64AsmPrinter() { RegisterAsmPrinter<AArch64AsmPrinter> X(getTheAArch64leTarget()); RegisterAsmPrinter<AArch64AsmPrinter> Y(getTheAArch64beTarget()); RegisterAsmPrinter<AArch64AsmPrinter> Z(getTheARM64Target()); + RegisterAsmPrinter<AArch64AsmPrinter> W(getTheARM64_32Target()); + RegisterAsmPrinter<AArch64AsmPrinter> V(getTheAArch64_32Target()); } diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp index 1ca89ba33a5..c5d4b183b69 100644 --- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp @@ -379,14 +379,16 @@ bool AArch64CallLowering::lowerFormalArguments( return false; if (F.isVarArg()) { - if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) { - // FIXME: we need to reimplement saveVarArgsRegisters from + auto &Subtarget = MF.getSubtarget<AArch64Subtarget>(); + if (!Subtarget.isTargetDarwin()) { + // FIXME: we need to reimplement saveVarArgsRegisters from // AArch64ISelLowering. return false; } - // We currently pass all varargs at 8-byte alignment. - uint64_t StackOffset = alignTo(Handler.StackUsed, 8); + // We currently pass all varargs at 8-byte alignment, or 4 in ILP32. + uint64_t StackOffset = + alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8); auto &MFI = MIRBuilder.getMF().getFrameInfo(); AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp index 02538a18761..39f42562d54 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp @@ -79,10 +79,14 @@ static bool CC_AArch64_Custom_Stack_Block( static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State) { + const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( + State.getMachineFunction().getSubtarget()); + bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO(); + // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. ArrayRef<MCPhysReg> RegList; - if (LocVT.SimpleTy == MVT::i64) + if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32)) RegList = XRegList; else if (LocVT.SimpleTy == MVT::f16) RegList = HRegList; @@ -107,8 +111,12 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, if (!ArgFlags.isInConsecutiveRegsLast()) return true; - unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); - if (RegResult) { + // [N x i32] arguments get packed into x-registers on Darwin's arm64_32 + // because that's how the armv7k Clang front-end emits small structs. + unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1; + unsigned RegResult = State.AllocateRegBlock( + RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg); + if (RegResult && EltsPerReg == 1) { for (auto &It : PendingMembers) { It.convertToReg(RegResult); State.addLoc(It); @@ -116,14 +124,26 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, } PendingMembers.clear(); return true; + } else if (RegResult) { + assert(EltsPerReg == 2 && "unexpected ABI"); + bool UseHigh = false; + CCValAssign::LocInfo Info; + for (auto &It : PendingMembers) { + Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt; + State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult, + MVT::i64, Info)); + UseHigh = !UseHigh; + if (!UseHigh) + ++RegResult; + } + PendingMembers.clear(); + return true; } // Mark all regs in the class as unavailable for (auto Reg : RegList) State.AllocateReg(Reg); - const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>( - State.getMachineFunction().getSubtarget()); unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8; return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/llvm/lib/Target/AArch64/AArch64CallingConvention.h index 13cc0c583fd..5a55d090d7c 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.h +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.h @@ -25,6 +25,9 @@ bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); +bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State); bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 95d3e4d1e2e..bccbbd4591e 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -17,6 +17,10 @@ class CCIfAlign<string Align, CCAction A> : class CCIfBigEndian<CCAction A> : CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; +class CCIfILP32<CCAction A> : + CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>; + + //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -123,6 +127,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[ CCIfType<[v2f32], CCBitConvertToType<v2i32>>, CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, + CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, // Big endian vectors must be passed as if they were 1-element vectors so that @@ -221,6 +226,12 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + + // Re-demote pointers to 32-bits so we don't end up storing 64-bit + // values and clobbering neighbouring stack locations. Not very pretty. + CCIfPtr<CCIfILP32<CCTruncToType<i32>>>, + CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>, + CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], CCAssignToStack<8, 8>>, CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], @@ -248,6 +259,29 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ CCAssignToStack<16, 16>> ]>; +// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the +// same as the normal Darwin VarArgs handling. +let Entry = 1 in +def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfType<[v2f32], CCBitConvertToType<v2i32>>, + CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, + + // Handle all scalar types as either i32 or f32. + CCIfType<[i8, i16], CCPromoteToType<i32>>, + CCIfType<[f16], CCPromoteToType<f32>>, + + // Everything is on the stack. + // i128 is split to two i64s, and its stack alignment is 16 bytes. + CCIfPtr<CCIfILP32<CCTruncToType<i32>>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, + CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, + CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], + CCAssignToStack<8, 8>>, + CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], + CCAssignToStack<16, 16>> +]>; + + // The WebKit_JS calling convention only passes the first argument (the callee) // in register and the remaining arguments on stack. We allow 32bit stack slots, // so that WebKit can write partial values in the stack and define the other diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp index 9f324b43320..35e6fef2436 100644 --- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -103,6 +103,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -181,6 +182,7 @@ static bool canDefBePartOfLOH(const MachineInstr &MI) { case AArch64::ADDXri: return canAddBePartOfLOH(MI); case AArch64::LDRXui: + case AArch64::LDRWui: // Check immediate to see if the immediate is an address. switch (MI.getOperand(2).getType()) { default: @@ -312,7 +314,8 @@ static void handleUse(const MachineInstr &MI, const MachineOperand &MO, Info.Type = MCLOH_AdrpAdd; Info.IsCandidate = true; Info.MI0 = &MI; - } else if (MI.getOpcode() == AArch64::LDRXui && + } else if ((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) { Info.Type = MCLOH_AdrpLdrGot; Info.IsCandidate = true; @@ -357,7 +360,9 @@ static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo, return true; } } else { - assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui"); + assert((MI.getOpcode() == AArch64::LDRXui || + MI.getOpcode() == AArch64::LDRWui) && + "Expect LDRXui or LDRWui"); assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) && "Expected GOT relocation"); if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) { @@ -474,13 +479,23 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) { handleClobber(LOHInfos[Idx]); } // Handle uses. + + SmallSet<int, 4> UsesSeen; for (const MachineOperand &MO : MI.uses()) { if (!MO.isReg() || !MO.readsReg()) continue; int Idx = mapRegToGPRIndex(MO.getReg()); if (Idx < 0) continue; - handleUse(MI, MO, LOHInfos[Idx]); + + // Multiple uses of the same register within a single instruction don't + // count as MultiUser or block optimization. This is especially important on + // arm64_32, where any memory operation is likely to be an explicit use of + // xN and an implicit use of wN (the base address register). + if (!UsesSeen.count(Idx)) { + handleUse(MI, MO, LOHInfos[Idx]); + UsesSeen.insert(Idx); + } } } @@ -512,6 +527,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { switch (Opcode) { case AArch64::ADDXri: case AArch64::LDRXui: + case AArch64::LDRWui: if (canDefBePartOfLOH(MI)) { const MachineOperand &Def = MI.getOperand(0); const MachineOperand &Op = MI.getOperand(1); diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 2d917dcbffb..4c845ed6f82 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -495,12 +495,26 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, } } else { // Small codemodel expand into ADRP + LDR. + MachineFunction &MF = *MI.getParent()->getParent(); + DebugLoc DL = MI.getDebugLoc(); MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); - MachineInstrBuilder MIB2 = - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui)) - .add(MI.getOperand(0)) - .addReg(DstReg); + + MachineInstrBuilder MIB2; + if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { + auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); + unsigned DstFlags = MI.getOperand(0).getTargetFlags(); + MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addReg(DstReg, RegState::Kill) + .addReg(DstReg, DstFlags | RegState::Implicit); + } else { + unsigned DstReg = MI.getOperand(0).getReg(); + MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) + .add(MI.getOperand(0)) + .addUse(DstReg, RegState::Kill); + } if (MO1.isGlobal()) { MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 15f283841a4..277a3052f1e 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -474,12 +474,32 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { ADRPReg) .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); - ResultReg = createResultReg(&AArch64::GPR64RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), + unsigned LdrOpc; + if (Subtarget->isTargetILP32()) { + ResultReg = createResultReg(&AArch64::GPR32RegClass); + LdrOpc = AArch64::LDRWui; + } else { + ResultReg = createResultReg(&AArch64::GPR64RegClass); + LdrOpc = AArch64::LDRXui; + } + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), ResultReg) - .addReg(ADRPReg) - .addGlobalAddress(GV, 0, - AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags); + .addReg(ADRPReg) + .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | + AArch64II::MO_NC | OpFlags); + if (!Subtarget->isTargetILP32()) + return ResultReg; + + // LDRWui produces a 32-bit register, but pointers in-register are 64-bits + // so we must extend the result on ILP32. + unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::SUBREG_TO_REG)) + .addDef(Result64) + .addImm(0) + .addReg(ResultReg, RegState::Kill) + .addImm(AArch64::sub_32); + return Result64; } else { // ADRP + ADDX BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), @@ -504,6 +524,15 @@ unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { if (!CEVT.isSimple()) return 0; MVT VT = CEVT.getSimpleVT(); + // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, + // 'null' pointers need to have a somewhat special treatment. + if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) { + (void)CPN; + assert(CPN->getType()->getPointerAddressSpace() == 0 && + "Unexpected address space"); + assert(VT == MVT::i64 && "Expected 64-bit pointers"); + return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); + } if (const auto *CI = dyn_cast<ConstantInt>(C)) return materializeInt(CI, VT); @@ -946,6 +975,9 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { EVT evt = TLI.getValueType(DL, Ty, true); + if (Subtarget->isTargetILP32() && Ty->isPointerTy()) + return false; + // Only handle simple types. if (evt == MVT::Other || !evt.isSimple()) return false; @@ -988,6 +1020,9 @@ bool AArch64FastISel::isValueAvailable(const Value *V) const { } bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { + if (Subtarget->isTargetILP32()) + return false; + unsigned ScaleFactor = getImplicitScaleFactor(VT); if (!ScaleFactor) return false; @@ -3165,6 +3200,11 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (IsTailCall) return false; + // FIXME: we could and should support this, but for now correctness at -O0 is + // more important. + if (Subtarget->isTargetILP32()) + return false; + CodeModel::Model CM = TM.getCodeModel(); // Only support the small-addressing and large code models. if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) @@ -3796,6 +3836,11 @@ bool AArch64FastISel::selectRet(const Instruction *I) { if (!FuncInfo.CanLowerReturn) return false; + // FIXME: in principle it could. Mostly just a case of zero extending outgoing + // pointers. + if (Subtarget->isTargetILP32()) + return false; + if (F.isVarArg()) return false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ef535beaa6c..7d466feeac9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" @@ -1053,6 +1054,14 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode( Known.One &= Known2.One; break; } + case AArch64ISD::LOADgot: + case AArch64ISD::ADDlow: { + if (!Subtarget->isTargetILP32()) + break; + // In ILP32 mode all valid pointers are in the low 4GB of the address-space. + Known.Zero = APInt::getHighBitsSet(64, 32); + break; + } case ISD::INTRINSIC_W_CHAIN: { ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1)); Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue()); @@ -3071,8 +3080,11 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC, return CC_AArch64_Win64_VarArg; if (!Subtarget->isTargetDarwin()) return CC_AArch64_AAPCS; - return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS; - case CallingConv::Win64: + if (!IsVarArg) + return CC_AArch64_DarwinPCS; + return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg + : CC_AArch64_DarwinPCS_VarArg; + case CallingConv::Win64: return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS; case CallingConv::AArch64_VectorCall: return CC_AArch64_AAPCS; @@ -3095,6 +3107,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; + DenseMap<unsigned, SDValue> CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs, *DAG.getContext()); @@ -3151,11 +3164,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments( continue; } + SDValue ArgValue; if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); - - SDValue ArgValue; const TargetRegisterClass *RC; if (RegVT == MVT::i32) @@ -3200,14 +3212,13 @@ SDValue AArch64TargetLowering::LowerFormalArguments( case CCValAssign::AExt: case CCValAssign::SExt: case CCValAssign::ZExt: - // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt - // nodes after our lowering. - assert(RegVT == Ins[i].VT && "incorrect register location selected"); + break; + case CCValAssign::AExtUpper: + ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue, + DAG.getConstant(32, DL, RegVT)); + ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT()); break; } - - InVals.push_back(ArgValue); - } else { // VA.isRegLoc() assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem"); unsigned ArgOffset = VA.getLocMemOffset(); @@ -3222,7 +3233,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); - SDValue ArgValue; // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; @@ -3231,6 +3241,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments( switch (VA.getLocInfo()) { default: break; + case CCValAssign::Trunc: case CCValAssign::BCvt: MemVT = VA.getLocVT(); break; @@ -3254,8 +3265,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments( MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), MemVT); - InVals.push_back(ArgValue); } + if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer()) + ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(), + ArgValue, DAG.getValueType(MVT::i32)); + InVals.push_back(ArgValue); } // varargs @@ -3272,8 +3286,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments( // This will point to the next argument passed via stack. unsigned StackOffset = CCInfo.getNextStackOffset(); - // We currently pass all varargs at 8-byte alignment. - StackOffset = ((StackOffset + 7) & ~7); + // We currently pass all varargs at 8-byte alignment, or 4 for ILP32 + StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8); FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true)); if (MFI.hasMustTailInVarArgFunc()) { @@ -3436,6 +3450,7 @@ SDValue AArch64TargetLowering::LowerCallResult( : RetCC_AArch64_AAPCS; // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RVLocs; + DenseMap<unsigned, SDValue> CopiedRegs; CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC); @@ -3453,10 +3468,16 @@ SDValue AArch64TargetLowering::LowerCallResult( continue; } - SDValue Val = - DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); - Chain = Val.getValue(1); - InFlag = Val.getValue(2); + // Avoid copying a physreg twice since RegAllocFast is incompetent and only + // allows one use of a physreg per block. + SDValue Val = CopiedRegs.lookup(VA.getLocReg()); + if (!Val) { + Val = + DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag); + Chain = Val.getValue(1); + InFlag = Val.getValue(2); + CopiedRegs[VA.getLocReg()] = Val; + } switch (VA.getLocInfo()) { default: @@ -3466,6 +3487,15 @@ SDValue AArch64TargetLowering::LowerCallResult( case CCValAssign::BCvt: Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val); break; + case CCValAssign::AExtUpper: + Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val, + DAG.getConstant(32, DL, VA.getLocVT())); + LLVM_FALLTHROUGH; + case CCValAssign::AExt: + LLVM_FALLTHROUGH; + case CCValAssign::ZExt: + Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT()); + break; } InVals.push_back(Val); @@ -3779,6 +3809,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, getPointerTy(DAG.getDataLayout())); SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; + SmallSet<unsigned, 8> RegsUsed; SmallVector<SDValue, 8> MemOpChains; auto PtrVT = getPointerTy(DAG.getDataLayout()); @@ -3786,7 +3817,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, const auto &Forwards = FuncInfo->getForwardedMustTailRegParms(); for (const auto &F : Forwards) { SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT); - RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val)); + RegsToPass.emplace_back(F.PReg, Val); } } @@ -3817,8 +3848,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; case CCValAssign::BCvt: - Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); + Arg = DAG.getBitcast(VA.getLocVT(), Arg); + break; + case CCValAssign::Trunc: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); break; case CCValAssign::FPExt: Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); @@ -3838,7 +3878,22 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, "unexpected use of 'returned'"); IsThisReturn = true; } - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + if (RegsUsed.count(VA.getLocReg())) { + // If this register has already been used then we're trying to pack + // parts of an [N x i32] into an X-register. The extension type will + // take care of putting the two halves in the right place but we have to + // combine them. + SDValue &Bits = + std::find_if(RegsToPass.begin(), RegsToPass.end(), + [=](const std::pair<unsigned, SDValue> &Elt) { + return Elt.first == VA.getLocReg(); + }) + ->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + } else { + RegsToPass.emplace_back(VA.getLocReg(), Arg); + RegsUsed.insert(VA.getLocReg()); + } } else { assert(VA.isMemLoc()); @@ -4071,7 +4126,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, // Copy the result values into the output registers. SDValue Flag; - SmallVector<SDValue, 4> RetOps(1, Chain); + SmallVector<std::pair<unsigned, SDValue>, 4> RetVals; + SmallSet<unsigned, 4> RegsUsed; for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size(); ++i, ++realRVLocIdx) { CCValAssign &VA = RVLocs[i]; @@ -4093,11 +4149,38 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, case CCValAssign::BCvt: Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg); break; + case CCValAssign::AExt: + case CCValAssign::ZExt: + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + break; + case CCValAssign::AExtUpper: + assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"); + Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT()); + Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg, + DAG.getConstant(32, DL, VA.getLocVT())); + break; } - Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag); + if (RegsUsed.count(VA.getLocReg())) { + SDValue &Bits = + std::find_if(RetVals.begin(), RetVals.end(), + [=](const std::pair<unsigned, SDValue> &Elt) { + return Elt.first == VA.getLocReg(); + }) + ->second; + Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg); + } else { + RetVals.emplace_back(VA.getLocReg(), Arg); + RegsUsed.insert(VA.getLocReg()); + } + } + + SmallVector<SDValue, 4> RetOps(1, Chain); + for (auto &RetVal : RetVals) { + Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag); Flag = Chain.getValue(1); - RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); + RetOps.push_back( + DAG.getRegister(RetVal.first, RetVal.second.getValueType())); } // Windows AArch64 ABIs require that for returning structs by value we copy @@ -4291,6 +4374,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, SDLoc DL(Op); MVT PtrVT = getPointerTy(DAG.getDataLayout()); + MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout()); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); SDValue TLVPAddr = @@ -4301,12 +4385,15 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op, // to obtain the address of the variable. SDValue Chain = DAG.getEntryNode(); SDValue FuncTLVGet = DAG.getLoad( - MVT::i64, DL, Chain, DescAddr, + PtrMemVT, DL, Chain, DescAddr, MachinePointerInfo::getGOT(DAG.getMachineFunction()), - /* Alignment = */ 8, + /* Alignment = */ PtrMemVT.getSizeInBits() / 8, MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); Chain = FuncTLVGet.getValue(1); + // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer. + FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT); + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); MFI.setAdjustsStack(true); @@ -5182,6 +5269,7 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op, SDLoc DL(Op); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), getPointerTy(DAG.getDataLayout())); + FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout())); const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), MachinePointerInfo(SV)); @@ -5288,15 +5376,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op, // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single // pointer. SDLoc DL(Op); - unsigned VaListSize = - Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32; + unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8; + unsigned VaListSize = (Subtarget->isTargetDarwin() || + Subtarget->isTargetWindows()) ? PtrSize : 32; const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), - Op.getOperand(2), - DAG.getConstant(VaListSize, DL, MVT::i32), - 8, false, false, false, MachinePointerInfo(DestSV), + return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2), + DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize, + false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } @@ -5310,12 +5398,15 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Addr = Op.getOperand(1); unsigned Align = Op.getConstantOperandVal(3); + unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8; auto PtrVT = getPointerTy(DAG.getDataLayout()); - - SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V)); + auto PtrMemVT = getPointerMemTy(DAG.getDataLayout()); + SDValue VAList = + DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V)); Chain = VAList.getValue(1); + VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT); - if (Align > 8) { + if (Align > MinSlotSize) { assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"); VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(Align - 1, DL, PtrVT)); @@ -5324,14 +5415,14 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { } Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); - uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); + unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); // Scalar integer and FP values smaller than 64 bits are implicitly extended // up to 64 bits. At the very least, we have to increase the striding of the // vaargs list to match this, and for FP values we need to introduce // FP_ROUND nodes as well. if (VT.isInteger() && !VT.isVector()) - ArgSize = 8; + ArgSize = std::max(ArgSize, MinSlotSize); bool NeedFPTrunc = false; if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) { ArgSize = 8; @@ -5341,6 +5432,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // Increment the pointer, VAList, to the next vaarg SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(ArgSize, DL, PtrVT)); + VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT); + // Store the incremented VAList to the legalized pointer SDValue APStore = DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V)); @@ -5370,10 +5463,15 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SDLoc DL(Op); unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); SDValue FrameAddr = - DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT); + DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64); while (Depth--) FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr, MachinePointerInfo()); + + if (Subtarget->isTargetILP32()) + FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr, + DAG.getValueType(VT)); + return FrameAddr; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 77ff6627932..60190b2eb95 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -261,6 +261,14 @@ public: const SelectionDAG &DAG, unsigned Depth = 0) const override; + MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override { + // Returning i64 unconditionally here (i.e. even for ILP32) means that the + // *DAG* representation of pointers will always be 64-bits. They will be + // truncated and extended when transferred to memory, but the 64-bit DAG + // allows us to use AArch64's addressing modes much more easily. + return MVT::getIntegerVT(64); + } + bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 3e1e798e43b..003391363e7 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1471,6 +1471,8 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { return false; MachineBasicBlock &MBB = *MI.getParent(); + auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>(); + auto TRI = Subtarget.getRegisterInfo(); DebugLoc DL = MI.getDebugLoc(); if (MI.getOpcode() == AArch64::CATCHRET) { @@ -1506,11 +1508,22 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { if ((OpFlags & AArch64II::MO_GOT) != 0) { BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg) .addGlobalAddress(GV, 0, OpFlags); - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addImm(0) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addImm(0) + .addMemOperand(*MI.memoperands_begin()); + } } else if (TM.getCodeModel() == CodeModel::Large) { + assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?"); BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC) .addImm(0); @@ -1537,10 +1550,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg) .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE); unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC; - BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) - .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, LoFlags) - .addMemOperand(*MI.memoperands_begin()); + if (Subtarget.isTargetILP32()) { + unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32); + BuildMI(MBB, MI, DL, get(AArch64::LDRWui)) + .addDef(Reg32, RegState::Dead) + .addUse(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()) + .addDef(Reg, RegState::Implicit); + } else { + BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) + .addReg(Reg, RegState::Kill) + .addGlobalAddress(GV, 0, LoFlags) + .addMemOperand(*MI.memoperands_begin()); + } } MBB.erase(MI); diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 60dbace03ca..ba61ed726e8 100644 --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -32,7 +32,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( const AArch64TargetLowering &TLI = *STI.getTargetLowering(); EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index ef360926aa9..757a4699986 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -411,6 +411,8 @@ public: bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } + bool isTargetILP32() const { return TargetTriple.isArch32Bit(); } + bool useAA() const override { return UseAA; } bool hasVH() const { return HasVH; } @@ -437,6 +439,12 @@ public: bool hasFMI() const { return HasFMI; } bool hasRCPC_IMMO() const { return HasRCPC_IMMO; } + bool addrSinkUsingGEPs() const override { + // Keeping GEPs inbounds is important for exploiting AArch64 + // addressing-modes in ILP32 mode. + return useAA() || isTargetILP32(); + } + bool useSmallAddressing() const { switch (TLInfo.getTargetMachine().getCodeModel()) { case CodeModel::Kernel: diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 71928aafa5d..11a4f991fed 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -157,6 +157,8 @@ extern "C" void LLVMInitializeAArch64Target() { RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget()); RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget()); RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target()); + RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target()); + RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target()); auto PR = PassRegistry::getPassRegistry(); initializeGlobalISel(*PR); initializeAArch64A53Fix835769Pass(*PR); @@ -201,8 +203,11 @@ static std::string computeDataLayout(const Triple &TT, bool LittleEndian) { if (Options.getABIName() == "ilp32") return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128"; - if (TT.isOSBinFormatMachO()) + if (TT.isOSBinFormatMachO()) { + if (TT.getArch() == Triple::aarch64_32) + return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128"; return "e-m:o-i64:64-i128:128-n32:64-S128"; + } if (TT.isOSBinFormatCOFF()) return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; if (LittleEndian) @@ -279,7 +284,8 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, } // Enable GlobalISel at or below EnableGlobalISelAt0. - if (getOptLevel() <= EnableGlobalISelAtO) { + if (getOptLevel() <= EnableGlobalISelAtO && + TT.getArch() != Triple::aarch64_32) { setGlobalISel(true); setGlobalISelAbort(GlobalISelAbortMode::Disable); } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index ecff1ab0a8b..5926a4f8161 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -30,7 +30,7 @@ static cl::opt<AsmWriterVariantTy> AsmWriterVariant( cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"), clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"))); -AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { +AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) { // We prefer NEON instructions to be printed in the short, Apple-specific // form when targeting Darwin. AssemblerDialect = AsmWriterVariant == Default ? Apple : AsmWriterVariant; @@ -39,7 +39,8 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() { PrivateLabelPrefix = "L"; SeparatorString = "%%"; CommentString = ";"; - CodePointerSize = CalleeSaveStackSlotSize = 8; + CalleeSaveStackSlotSize = 8; + CodePointerSize = IsILP32 ? 4 : 8; AlignmentIsInBytes = false; UsesELFSectionDirectiveForBSS = true; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index 36ae92afc8c..7274ae79f74 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -23,7 +23,7 @@ class Target; class Triple; struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin { - explicit AArch64MCAsmInfoDarwin(); + explicit AArch64MCAsmInfoDarwin(bool IsILP32); const MCExpr * getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index df12274d947..1d583ec0087 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -241,7 +241,7 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, const Triple &TheTriple) { MCAsmInfo *MAI; if (TheTriple.isOSBinFormatMachO()) - MAI = new AArch64MCAsmInfoDarwin(); + MAI = new AArch64MCAsmInfoDarwin(TheTriple.getArch() == Triple::aarch64_32); else if (TheTriple.isWindowsMSVCEnvironment()) MAI = new AArch64MCAsmInfoMicrosoftCOFF(); else if (TheTriple.isOSBinFormatCOFF()) diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index a4b23dc8771..97abd084bf5 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3387,6 +3387,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) { case CCValAssign::SExtUpper: case CCValAssign::ZExtUpper: case CCValAssign::FPExt: + case CCValAssign::Trunc: llvm_unreachable("Unexpected loc info!"); case CCValAssign::Indirect: // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully |