summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp4
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp1
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp1
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp1
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp2
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp3
-rw-r--r--llvm/lib/LTO/LTOModule.cpp3
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp3
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallLowering.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.cpp30
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.h3
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td34
-rw-r--r--llvm/lib/Target/AArch64/AArch64CollectLOH.cpp22
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp22
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp55
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp174
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h8
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp39
-rw-r--r--llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h8
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp10
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp5
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp1
29 files changed, 379 insertions, 82 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index dc1e8d3d241..86e3020d6d2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9867,6 +9867,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
+ // Analyses past this point are naive and don't expect an assertion.
+ if (Res.getOpcode() == ISD::AssertZext)
+ Res = Res.getOperand(0);
+
// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 5a5f0cd63ac..7c6860eb26c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::BZERO, "__bzero");
break;
case Triple::aarch64:
+ case Triple::aarch64_32:
setLibcallName(RTLIB::BZERO, "bzero");
break;
default:
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 653f30a12a2..68a14453e76 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -155,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
break;
case Triple::aarch64:
case Triple::aarch64_be:
+ case Triple::aarch64_32:
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index d59a7a6e29f..0295db7633d 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -120,7 +120,8 @@ createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
return make_error<StringError>(
std::string("No callback manager available for ") + T.str(),
inconvertibleErrorCode());
- case Triple::aarch64: {
+ case Triple::aarch64:
+ case Triple::aarch64_32: {
typedef orc::LocalJITCompileCallbackManager<orc::OrcAArch64> CCMgrT;
return CCMgrT::Create(ES, ErrorHandlerAddress);
}
@@ -168,6 +169,7 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
};
case Triple::aarch64:
+ case Triple::aarch64_32:
return [](){
return std::make_unique<
orc::LocalIndirectStubsManager<orc::OrcAArch64>>();
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 863698eb725..93aabd817d6 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -90,6 +90,7 @@ createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
inconvertibleErrorCode());
case Triple::aarch64:
+ case Triple::aarch64_32:
return LocalLazyCallThroughManager::Create<OrcAArch64>(ES,
ErrorHandlerAddr);
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 4b328624ccd..20dccd84803 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -919,7 +919,8 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr,
unsigned AbiVariant) {
- if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be) {
+ if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be ||
+ Arch == Triple::aarch64_32) {
// This stub has to be able to access the full address space,
// since symbol lookup won't necessarily find a handy, in-range,
// PLT stub for functions which could be anywhere.
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index a6a818601c6..9ca76602ea1 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -357,6 +357,8 @@ RuntimeDyldMachO::create(Triple::ArchType Arch,
return std::make_unique<RuntimeDyldMachOARM>(MemMgr, Resolver);
case Triple::aarch64:
return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
+ case Triple::aarch64_32:
+ return std::make_unique<RuntimeDyldMachOAArch64>(MemMgr, Resolver);
case Triple::x86:
return std::make_unique<RuntimeDyldMachOI386>(MemMgr, Resolver);
case Triple::x86_64:
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index bd03184b03c..199c6994992 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -365,7 +365,8 @@ bool LTOCodeGenerator::determineTarget() {
MCpu = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
MCpu = "yonah";
- else if (Triple.getArch() == llvm::Triple::aarch64)
+ else if (Triple.getArch() == llvm::Triple::aarch64 ||
+ Triple.getArch() == llvm::Triple::aarch64_32)
MCpu = "cyclone";
}
diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp
index 7ffe7bf84ba..587b332e706 100644
--- a/llvm/lib/LTO/LTOModule.cpp
+++ b/llvm/lib/LTO/LTOModule.cpp
@@ -220,7 +220,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
CPU = "core2";
else if (Triple.getArch() == llvm::Triple::x86)
CPU = "yonah";
- else if (Triple.getArch() == llvm::Triple::aarch64)
+ else if (Triple.getArch() == llvm::Triple::aarch64 ||
+ Triple.getArch() == llvm::Triple::aarch64_32)
CPU = "cyclone";
}
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 6c8f827d59a..02c0d41d132 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -489,7 +489,8 @@ static void initTMBuilder(TargetMachineBuilder &TMBuilder,
TMBuilder.MCpu = "core2";
else if (TheTriple.getArch() == llvm::Triple::x86)
TMBuilder.MCpu = "yonah";
- else if (TheTriple.getArch() == llvm::Triple::aarch64)
+ else if (TheTriple.getArch() == llvm::Triple::aarch64 ||
+ TheTriple.getArch() == llvm::Triple::aarch64_32)
TMBuilder.MCpu = "cyclone";
}
TMBuilder.TheTriple = std::move(TheTriple);
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 861e5013b6b..70c0409ece7 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -28,7 +28,7 @@ static bool useCompactUnwind(const Triple &T) {
return false;
// aarch64 always has it.
- if (T.getArch() == Triple::aarch64)
+ if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)
return true;
// armv7k always has it.
@@ -57,7 +57,8 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
MachO::S_ATTR_STRIP_STATIC_SYMS | MachO::S_ATTR_LIVE_SUPPORT,
SectionKind::getReadOnly());
- if (T.isOSDarwin() && T.getArch() == Triple::aarch64)
+ if (T.isOSDarwin() &&
+ (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
SupportsCompactUnwindWithoutEHFrame = true;
if (T.isWatchABI())
@@ -193,7 +194,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)
CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_X86_64_MODE_DWARF
- else if (T.getArch() == Triple::aarch64)
+ else if (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32)
CompactUnwindDwarfEHFrameOnly = 0x03000000; // UNWIND_ARM64_MODE_DWARF
else if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb)
CompactUnwindDwarfEHFrameOnly = 0x04000000; // UNWIND_ARM_MODE_DWARF
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index e2c9a43b84b..21048998edf 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -1203,4 +1203,6 @@ extern "C" void LLVMInitializeAArch64AsmPrinter() {
RegisterAsmPrinter<AArch64AsmPrinter> X(getTheAArch64leTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Y(getTheAArch64beTarget());
RegisterAsmPrinter<AArch64AsmPrinter> Z(getTheARM64Target());
+ RegisterAsmPrinter<AArch64AsmPrinter> W(getTheARM64_32Target());
+ RegisterAsmPrinter<AArch64AsmPrinter> V(getTheAArch64_32Target());
}
diff --git a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index 1ca89ba33a5..c5d4b183b69 100644
--- a/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -379,14 +379,16 @@ bool AArch64CallLowering::lowerFormalArguments(
return false;
if (F.isVarArg()) {
- if (!MF.getSubtarget<AArch64Subtarget>().isTargetDarwin()) {
- // FIXME: we need to reimplement saveVarArgsRegisters from
+ auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ if (!Subtarget.isTargetDarwin()) {
+ // FIXME: we need to reimplement saveVarArgsRegisters from
// AArch64ISelLowering.
return false;
}
- // We currently pass all varargs at 8-byte alignment.
- uint64_t StackOffset = alignTo(Handler.StackUsed, 8);
+ // We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
+ uint64_t StackOffset =
+ alignTo(Handler.StackUsed, Subtarget.isTargetILP32() ? 4 : 8);
auto &MFI = MIRBuilder.getMF().getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
index 02538a18761..39f42562d54 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -79,10 +79,14 @@ static bool CC_AArch64_Custom_Stack_Block(
static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+ const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
+ State.getMachineFunction().getSubtarget());
+ bool IsDarwinILP32 = Subtarget.isTargetILP32() && Subtarget.isTargetMachO();
+
// Try to allocate a contiguous block of registers, each of the correct
// size to hold one member.
ArrayRef<MCPhysReg> RegList;
- if (LocVT.SimpleTy == MVT::i64)
+ if (LocVT.SimpleTy == MVT::i64 || (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32))
RegList = XRegList;
else if (LocVT.SimpleTy == MVT::f16)
RegList = HRegList;
@@ -107,8 +111,12 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
if (!ArgFlags.isInConsecutiveRegsLast())
return true;
- unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size());
- if (RegResult) {
+ // [N x i32] arguments get packed into x-registers on Darwin's arm64_32
+ // because that's how the armv7k Clang front-end emits small structs.
+ unsigned EltsPerReg = (IsDarwinILP32 && LocVT.SimpleTy == MVT::i32) ? 2 : 1;
+ unsigned RegResult = State.AllocateRegBlock(
+ RegList, alignTo(PendingMembers.size(), EltsPerReg) / EltsPerReg);
+ if (RegResult && EltsPerReg == 1) {
for (auto &It : PendingMembers) {
It.convertToReg(RegResult);
State.addLoc(It);
@@ -116,14 +124,26 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
}
PendingMembers.clear();
return true;
+ } else if (RegResult) {
+ assert(EltsPerReg == 2 && "unexpected ABI");
+ bool UseHigh = false;
+ CCValAssign::LocInfo Info;
+ for (auto &It : PendingMembers) {
+ Info = UseHigh ? CCValAssign::AExtUpper : CCValAssign::ZExt;
+ State.addLoc(CCValAssign::getReg(It.getValNo(), MVT::i32, RegResult,
+ MVT::i64, Info));
+ UseHigh = !UseHigh;
+ if (!UseHigh)
+ ++RegResult;
+ }
+ PendingMembers.clear();
+ return true;
}
// Mark all regs in the class as unavailable
for (auto Reg : RegList)
State.AllocateReg(Reg);
- const AArch64Subtarget &Subtarget = static_cast<const AArch64Subtarget &>(
- State.getMachineFunction().getSubtarget());
unsigned SlotAlign = Subtarget.isTargetDarwin() ? 1 : 8;
return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index 13cc0c583fd..5a55d090d7c 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -25,6 +25,9 @@ bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
bool CC_AArch64_DarwinPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
+bool CC_AArch64_DarwinPCS_ILP32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_Win64_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 95d3e4d1e2e..bccbbd4591e 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -17,6 +17,10 @@ class CCIfAlign<string Align, CCAction A> :
class CCIfBigEndian<CCAction A> :
CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>;
+class CCIfILP32<CCAction A> :
+ CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
+
+
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
//===----------------------------------------------------------------------===//
@@ -123,6 +127,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
+ CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>,
CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>,
// Big endian vectors must be passed as if they were 1-element vectors so that
@@ -221,6 +226,12 @@ def CC_AArch64_DarwinPCS : CallingConv<[
CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>,
CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>,
CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+
+ // Re-demote pointers to 32-bits so we don't end up storing 64-bit
+ // values and clobbering neighbouring stack locations. Not very pretty.
+ CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+ CCIfPtr<CCIfILP32<CCAssignToStack<4, 4>>>,
+
CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16],
CCAssignToStack<8, 8>>,
CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
@@ -248,6 +259,29 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
CCAssignToStack<16, 16>>
]>;
+// In the ILP32 world, the minimum stack slot size is 4 bytes. Otherwise the
+// same as the normal Darwin VarArgs handling.
+let Entry = 1 in
+def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
+ CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
+ CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
+
+ // Handle all scalar types as either i32 or f32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+ CCIfType<[f16], CCPromoteToType<f32>>,
+
+ // Everything is on the stack.
+ // i128 is split to two i64s, and its stack alignment is 16 bytes.
+ CCIfPtr<CCIfILP32<CCTruncToType<i32>>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+ CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>,
+ CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16],
+ CCAssignToStack<8, 8>>,
+ CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16],
+ CCAssignToStack<16, 16>>
+]>;
+
+
// The WebKit_JS calling convention only passes the first argument (the callee)
// in register and the remaining arguments on stack. We allow 32bit stack slots,
// so that WebKit can write partial values in the stack and define the other
diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index 9f324b43320..35e6fef2436 100644
--- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -103,6 +103,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -181,6 +182,7 @@ static bool canDefBePartOfLOH(const MachineInstr &MI) {
case AArch64::ADDXri:
return canAddBePartOfLOH(MI);
case AArch64::LDRXui:
+ case AArch64::LDRWui:
// Check immediate to see if the immediate is an address.
switch (MI.getOperand(2).getType()) {
default:
@@ -312,7 +314,8 @@ static void handleUse(const MachineInstr &MI, const MachineOperand &MO,
Info.Type = MCLOH_AdrpAdd;
Info.IsCandidate = true;
Info.MI0 = &MI;
- } else if (MI.getOpcode() == AArch64::LDRXui &&
+ } else if ((MI.getOpcode() == AArch64::LDRXui ||
+ MI.getOpcode() == AArch64::LDRWui) &&
MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) {
Info.Type = MCLOH_AdrpLdrGot;
Info.IsCandidate = true;
@@ -357,7 +360,9 @@ static bool handleMiddleInst(const MachineInstr &MI, LOHInfo &DefInfo,
return true;
}
} else {
- assert(MI.getOpcode() == AArch64::LDRXui && "Expect LDRXui");
+ assert((MI.getOpcode() == AArch64::LDRXui ||
+ MI.getOpcode() == AArch64::LDRWui) &&
+ "Expect LDRXui or LDRWui");
assert((MI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) &&
"Expected GOT relocation");
if (OpInfo.Type == MCLOH_AdrpAddStr && OpInfo.MI1 == nullptr) {
@@ -474,13 +479,23 @@ static void handleNormalInst(const MachineInstr &MI, LOHInfo *LOHInfos) {
handleClobber(LOHInfos[Idx]);
}
// Handle uses.
+
+ SmallSet<int, 4> UsesSeen;
for (const MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.readsReg())
continue;
int Idx = mapRegToGPRIndex(MO.getReg());
if (Idx < 0)
continue;
- handleUse(MI, MO, LOHInfos[Idx]);
+
+ // Multiple uses of the same register within a single instruction don't
+ // count as MultiUser or block optimization. This is especially important on
+ // arm64_32, where any memory operation is likely to be an explicit use of
+ // xN and an implicit use of wN (the base address register).
+ if (!UsesSeen.count(Idx)) {
+ handleUse(MI, MO, LOHInfos[Idx]);
+ UsesSeen.insert(Idx);
+ }
}
}
@@ -512,6 +527,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) {
switch (Opcode) {
case AArch64::ADDXri:
case AArch64::LDRXui:
+ case AArch64::LDRWui:
if (canDefBePartOfLOH(MI)) {
const MachineOperand &Def = MI.getOperand(0);
const MachineOperand &Op = MI.getOperand(1);
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 2d917dcbffb..4c845ed6f82 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -495,12 +495,26 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
}
} else {
// Small codemodel expand into ADRP + LDR.
+ MachineFunction &MF = *MI.getParent()->getParent();
+ DebugLoc DL = MI.getDebugLoc();
MachineInstrBuilder MIB1 =
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
- MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRXui))
- .add(MI.getOperand(0))
- .addReg(DstReg);
+
+ MachineInstrBuilder MIB2;
+ if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
+ auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
+ unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
+ unsigned DstFlags = MI.getOperand(0).getTargetFlags();
+ MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
+ .addDef(Reg32, RegState::Dead)
+ .addReg(DstReg, RegState::Kill)
+ .addReg(DstReg, DstFlags | RegState::Implicit);
+ } else {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
+ .add(MI.getOperand(0))
+ .addUse(DstReg, RegState::Kill);
+ }
if (MO1.isGlobal()) {
MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 15f283841a4..277a3052f1e 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -474,12 +474,32 @@ unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
ADRPReg)
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
- ResultReg = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
+ unsigned LdrOpc;
+ if (Subtarget->isTargetILP32()) {
+ ResultReg = createResultReg(&AArch64::GPR32RegClass);
+ LdrOpc = AArch64::LDRWui;
+ } else {
+ ResultReg = createResultReg(&AArch64::GPR64RegClass);
+ LdrOpc = AArch64::LDRXui;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
+ .addReg(ADRPReg)
+ .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
+ AArch64II::MO_NC | OpFlags);
+ if (!Subtarget->isTargetILP32())
+ return ResultReg;
+
+ // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
+ // so we must extend the result on ILP32.
+ unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+ TII.get(TargetOpcode::SUBREG_TO_REG))
+ .addDef(Result64)
+ .addImm(0)
+ .addReg(ResultReg, RegState::Kill)
+ .addImm(AArch64::sub_32);
+ return Result64;
} else {
// ADRP + ADDX
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
@@ -504,6 +524,15 @@ unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
if (!CEVT.isSimple())
return 0;
MVT VT = CEVT.getSimpleVT();
+ // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
+ // 'null' pointers need to have a somewhat special treatment.
+ if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) {
+ (void)CPN;
+ assert(CPN->getType()->getPointerAddressSpace() == 0 &&
+ "Unexpected address space");
+ assert(VT == MVT::i64 && "Expected 64-bit pointers");
+ return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
+ }
if (const auto *CI = dyn_cast<ConstantInt>(C))
return materializeInt(CI, VT);
@@ -946,6 +975,9 @@ bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
EVT evt = TLI.getValueType(DL, Ty, true);
+ if (Subtarget->isTargetILP32() && Ty->isPointerTy())
+ return false;
+
// Only handle simple types.
if (evt == MVT::Other || !evt.isSimple())
return false;
@@ -988,6 +1020,9 @@ bool AArch64FastISel::isValueAvailable(const Value *V) const {
}
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
+ if (Subtarget->isTargetILP32())
+ return false;
+
unsigned ScaleFactor = getImplicitScaleFactor(VT);
if (!ScaleFactor)
return false;
@@ -3165,6 +3200,11 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (IsTailCall)
return false;
+ // FIXME: we could and should support this, but for now correctness at -O0 is
+ // more important.
+ if (Subtarget->isTargetILP32())
+ return false;
+
CodeModel::Model CM = TM.getCodeModel();
// Only support the small-addressing and large code models.
if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
@@ -3796,6 +3836,11 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ // FIXME: in principle it could. Mostly just a case of zero extending outgoing
+ // pointers.
+ if (Subtarget->isTargetILP32())
+ return false;
+
if (F.isVarArg())
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index ef535beaa6c..7d466feeac9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
@@ -1053,6 +1054,14 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known.One &= Known2.One;
break;
}
+ case AArch64ISD::LOADgot:
+ case AArch64ISD::ADDlow: {
+ if (!Subtarget->isTargetILP32())
+ break;
+ // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
+ Known.Zero = APInt::getHighBitsSet(64, 32);
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
@@ -3071,8 +3080,11 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
return CC_AArch64_Win64_VarArg;
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
- return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
- case CallingConv::Win64:
+ if (!IsVarArg)
+ return CC_AArch64_DarwinPCS;
+ return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
+ : CC_AArch64_DarwinPCS_VarArg;
+ case CallingConv::Win64:
return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
case CallingConv::AArch64_VectorCall:
return CC_AArch64_AAPCS;
@@ -3095,6 +3107,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
+ DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
*DAG.getContext());
@@ -3151,11 +3164,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
continue;
}
+ SDValue ArgValue;
if (VA.isRegLoc()) {
// Arguments stored in registers.
EVT RegVT = VA.getLocVT();
-
- SDValue ArgValue;
const TargetRegisterClass *RC;
if (RegVT == MVT::i32)
@@ -3200,14 +3212,13 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
case CCValAssign::AExt:
case CCValAssign::SExt:
case CCValAssign::ZExt:
- // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
- // nodes after our lowering.
- assert(RegVT == Ins[i].VT && "incorrect register location selected");
+ break;
+ case CCValAssign::AExtUpper:
+ ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
+ DAG.getConstant(32, DL, RegVT));
+ ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
break;
}
-
- InVals.push_back(ArgValue);
-
} else { // VA.isRegLoc()
assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem");
unsigned ArgOffset = VA.getLocMemOffset();
@@ -3222,7 +3233,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
- SDValue ArgValue;
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
@@ -3231,6 +3241,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
switch (VA.getLocInfo()) {
default:
break;
+ case CCValAssign::Trunc:
case CCValAssign::BCvt:
MemVT = VA.getLocVT();
break;
@@ -3254,8 +3265,11 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
MemVT);
- InVals.push_back(ArgValue);
}
+ if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
+ ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
+ ArgValue, DAG.getValueType(MVT::i32));
+ InVals.push_back(ArgValue);
}
// varargs
@@ -3272,8 +3286,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
// This will point to the next argument passed via stack.
unsigned StackOffset = CCInfo.getNextStackOffset();
- // We currently pass all varargs at 8-byte alignment.
- StackOffset = ((StackOffset + 7) & ~7);
+ // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
+ StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
@@ -3436,6 +3450,7 @@ SDValue AArch64TargetLowering::LowerCallResult(
: RetCC_AArch64_AAPCS;
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
+ DenseMap<unsigned, SDValue> CopiedRegs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
CCInfo.AnalyzeCallResult(Ins, RetCC);
@@ -3453,10 +3468,16 @@ SDValue AArch64TargetLowering::LowerCallResult(
continue;
}
- SDValue Val =
- DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
- Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ // Avoid copying a physreg twice since RegAllocFast is incompetent and only
+ // allows one use of a physreg per block.
+ SDValue Val = CopiedRegs.lookup(VA.getLocReg());
+ if (!Val) {
+ Val =
+ DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
+ Chain = Val.getValue(1);
+ InFlag = Val.getValue(2);
+ CopiedRegs[VA.getLocReg()] = Val;
+ }
switch (VA.getLocInfo()) {
default:
@@ -3466,6 +3487,15 @@ SDValue AArch64TargetLowering::LowerCallResult(
case CCValAssign::BCvt:
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
break;
+ case CCValAssign::AExtUpper:
+ Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
+ DAG.getConstant(32, DL, VA.getLocVT()));
+ LLVM_FALLTHROUGH;
+ case CCValAssign::AExt:
+ LLVM_FALLTHROUGH;
+ case CCValAssign::ZExt:
+ Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
+ break;
}
InVals.push_back(Val);
@@ -3779,6 +3809,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
getPointerTy(DAG.getDataLayout()));
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallSet<unsigned, 8> RegsUsed;
SmallVector<SDValue, 8> MemOpChains;
auto PtrVT = getPointerTy(DAG.getDataLayout());
@@ -3786,7 +3817,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
for (const auto &F : Forwards) {
SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
- RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
+ RegsToPass.emplace_back(F.PReg, Val);
}
}
@@ -3817,8 +3848,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::AExtUpper:
+ assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(32, DL, VA.getLocVT()));
+ break;
case CCValAssign::BCvt:
- Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+ Arg = DAG.getBitcast(VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::Trunc:
+ Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
break;
case CCValAssign::FPExt:
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
@@ -3838,7 +3878,22 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
"unexpected use of 'returned'");
IsThisReturn = true;
}
- RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ if (RegsUsed.count(VA.getLocReg())) {
+ // If this register has already been used then we're trying to pack
+ // parts of an [N x i32] into an X-register. The extension type will
+ // take care of putting the two halves in the right place but we have to
+ // combine them.
+ SDValue &Bits =
+ std::find_if(RegsToPass.begin(), RegsToPass.end(),
+ [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })
+ ->second;
+ Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
+ } else {
+ RegsToPass.emplace_back(VA.getLocReg(), Arg);
+ RegsUsed.insert(VA.getLocReg());
+ }
} else {
assert(VA.isMemLoc());
@@ -4071,7 +4126,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Copy the result values into the output registers.
SDValue Flag;
- SmallVector<SDValue, 4> RetOps(1, Chain);
+ SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
+ SmallSet<unsigned, 4> RegsUsed;
for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
++i, ++realRVLocIdx) {
CCValAssign &VA = RVLocs[i];
@@ -4093,11 +4149,38 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
case CCValAssign::BCvt:
Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
break;
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt:
+ Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
+ break;
+ case CCValAssign::AExtUpper:
+ assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits");
+ Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
+ Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
+ DAG.getConstant(32, DL, VA.getLocVT()));
+ break;
}
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
+ if (RegsUsed.count(VA.getLocReg())) {
+ SDValue &Bits =
+ std::find_if(RetVals.begin(), RetVals.end(),
+ [=](const std::pair<unsigned, SDValue> &Elt) {
+ return Elt.first == VA.getLocReg();
+ })
+ ->second;
+ Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
+ } else {
+ RetVals.emplace_back(VA.getLocReg(), Arg);
+ RegsUsed.insert(VA.getLocReg());
+ }
+ }
+
+ SmallVector<SDValue, 4> RetOps(1, Chain);
+ for (auto &RetVal : RetVals) {
+ Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
Flag = Chain.getValue(1);
- RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ RetOps.push_back(
+ DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
// Windows AArch64 ABIs require that for returning structs by value we copy
@@ -4291,6 +4374,7 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
SDLoc DL(Op);
MVT PtrVT = getPointerTy(DAG.getDataLayout());
+ MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDValue TLVPAddr =
@@ -4301,12 +4385,15 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
// to obtain the address of the variable.
SDValue Chain = DAG.getEntryNode();
SDValue FuncTLVGet = DAG.getLoad(
- MVT::i64, DL, Chain, DescAddr,
+ PtrMemVT, DL, Chain, DescAddr,
MachinePointerInfo::getGOT(DAG.getMachineFunction()),
- /* Alignment = */ 8,
+ /* Alignment = */ PtrMemVT.getSizeInBits() / 8,
MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
Chain = FuncTLVGet.getValue(1);
+ // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
+ FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
+
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
MFI.setAdjustsStack(true);
@@ -5182,6 +5269,7 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
SDLoc DL(Op);
SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
getPointerTy(DAG.getDataLayout()));
+ FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
@@ -5288,15 +5376,15 @@ SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
// AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
// pointer.
SDLoc DL(Op);
- unsigned VaListSize =
- Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
+ unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
+ unsigned VaListSize = (Subtarget->isTargetDarwin() ||
+ Subtarget->isTargetWindows()) ? PtrSize : 32;
const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
- Op.getOperand(2),
- DAG.getConstant(VaListSize, DL, MVT::i32),
- 8, false, false, false, MachinePointerInfo(DestSV),
+ return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize,
+ false, false, false, MachinePointerInfo(DestSV),
MachinePointerInfo(SrcSV));
}
@@ -5310,12 +5398,15 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Addr = Op.getOperand(1);
unsigned Align = Op.getConstantOperandVal(3);
+ unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
auto PtrVT = getPointerTy(DAG.getDataLayout());
-
- SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
+ auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
+ SDValue VAList =
+ DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
Chain = VAList.getValue(1);
+ VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
- if (Align > 8) {
+ if (Align > MinSlotSize) {
assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2");
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(Align - 1, DL, PtrVT));
@@ -5324,14 +5415,14 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
}
Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
- uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
+ unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
// Scalar integer and FP values smaller than 64 bits are implicitly extended
// up to 64 bits. At the very least, we have to increase the striding of the
// vaargs list to match this, and for FP values we need to introduce
// FP_ROUND nodes as well.
if (VT.isInteger() && !VT.isVector())
- ArgSize = 8;
+ ArgSize = std::max(ArgSize, MinSlotSize);
bool NeedFPTrunc = false;
if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
ArgSize = 8;
@@ -5341,6 +5432,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// Increment the pointer, VAList, to the next vaarg
SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
DAG.getConstant(ArgSize, DL, PtrVT));
+ VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
+
// Store the incremented VAList to the legalized pointer
SDValue APStore =
DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
@@ -5370,10 +5463,15 @@ SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
SDLoc DL(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
SDValue FrameAddr =
- DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
+ DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
while (Depth--)
FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
MachinePointerInfo());
+
+ if (Subtarget->isTargetILP32())
+ FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
+ DAG.getValueType(VT));
+
return FrameAddr;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 77ff6627932..60190b2eb95 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -261,6 +261,14 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
+ // Returning i64 unconditionally here (i.e. even for ILP32) means that the
+ // *DAG* representation of pointers will always be 64-bits. They will be
+ // truncated and extended when transferred to memory, but the 64-bit DAG
+ // allows us to use AArch64's addressing modes much more easily.
+ return MVT::getIntegerVT(64);
+ }
+
bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
TargetLoweringOpt &TLO) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3e1e798e43b..003391363e7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -1471,6 +1471,8 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return false;
MachineBasicBlock &MBB = *MI.getParent();
+ auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
+ auto TRI = Subtarget.getRegisterInfo();
DebugLoc DL = MI.getDebugLoc();
if (MI.getOpcode() == AArch64::CATCHRET) {
@@ -1506,11 +1508,22 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
if ((OpFlags & AArch64II::MO_GOT) != 0) {
BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
.addGlobalAddress(GV, 0, OpFlags);
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill)
- .addImm(0)
- .addMemOperand(*MI.memoperands_begin());
+ if (Subtarget.isTargetILP32()) {
+ unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+ .addDef(Reg32, RegState::Dead)
+ .addUse(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin())
+ .addDef(Reg, RegState::Implicit);
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addImm(0)
+ .addMemOperand(*MI.memoperands_begin());
+ }
} else if (TM.getCodeModel() == CodeModel::Large) {
+ assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
.addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
.addImm(0);
@@ -1537,10 +1550,20 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
.addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
- BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
- .addReg(Reg, RegState::Kill)
- .addGlobalAddress(GV, 0, LoFlags)
- .addMemOperand(*MI.memoperands_begin());
+ if (Subtarget.isTargetILP32()) {
+ unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
+ BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
+ .addDef(Reg32, RegState::Dead)
+ .addUse(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI.memoperands_begin())
+ .addDef(Reg, RegState::Implicit);
+ } else {
+ BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
+ .addReg(Reg, RegState::Kill)
+ .addGlobalAddress(GV, 0, LoFlags)
+ .addMemOperand(*MI.memoperands_begin());
+ }
}
MBB.erase(MI);
diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
index 60dbace03ca..ba61ed726e8 100644
--- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp
@@ -32,7 +32,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset(
const AArch64TargetLowering &TLI = *STI.getTargetLowering();
EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
- Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+ Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Dst;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index ef360926aa9..757a4699986 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -411,6 +411,8 @@ public:
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
+ bool isTargetILP32() const { return TargetTriple.isArch32Bit(); }
+
bool useAA() const override { return UseAA; }
bool hasVH() const { return HasVH; }
@@ -437,6 +439,12 @@ public:
bool hasFMI() const { return HasFMI; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
+ bool addrSinkUsingGEPs() const override {
+ // Keeping GEPs inbounds is important for exploiting AArch64
+ // addressing-modes in ILP32 mode.
+ return useAA() || isTargetILP32();
+ }
+
bool useSmallAddressing() const {
switch (TLInfo.getTargetMachine().getCodeModel()) {
case CodeModel::Kernel:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 71928aafa5d..11a4f991fed 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -157,6 +157,8 @@ extern "C" void LLVMInitializeAArch64Target() {
RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
+ RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target());
+ RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target());
auto PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
initializeAArch64A53Fix835769Pass(*PR);
@@ -201,8 +203,11 @@ static std::string computeDataLayout(const Triple &TT,
bool LittleEndian) {
if (Options.getABIName() == "ilp32")
return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
- if (TT.isOSBinFormatMachO())
+ if (TT.isOSBinFormatMachO()) {
+ if (TT.getArch() == Triple::aarch64_32)
+ return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128";
return "e-m:o-i64:64-i128:128-n32:64-S128";
+ }
if (TT.isOSBinFormatCOFF())
return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
if (LittleEndian)
@@ -279,7 +284,8 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
}
// Enable GlobalISel at or below EnableGlobalISelAt0.
- if (getOptLevel() <= EnableGlobalISelAtO) {
+ if (getOptLevel() <= EnableGlobalISelAtO &&
+ TT.getArch() != Triple::aarch64_32) {
setGlobalISel(true);
setGlobalISelAbort(GlobalISelAbortMode::Disable);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index ecff1ab0a8b..5926a4f8161 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -30,7 +30,7 @@ static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly")));
-AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
+AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin(bool IsILP32) {
// We prefer NEON instructions to be printed in the short, Apple-specific
// form when targeting Darwin.
AssemblerDialect = AsmWriterVariant == Default ? Apple : AsmWriterVariant;
@@ -39,7 +39,8 @@ AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
PrivateLabelPrefix = "L";
SeparatorString = "%%";
CommentString = ";";
- CodePointerSize = CalleeSaveStackSlotSize = 8;
+ CalleeSaveStackSlotSize = 8;
+ CodePointerSize = IsILP32 ? 4 : 8;
AlignmentIsInBytes = false;
UsesELFSectionDirectiveForBSS = true;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
index 36ae92afc8c..7274ae79f74 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h
@@ -23,7 +23,7 @@ class Target;
class Triple;
struct AArch64MCAsmInfoDarwin : public MCAsmInfoDarwin {
- explicit AArch64MCAsmInfoDarwin();
+ explicit AArch64MCAsmInfoDarwin(bool IsILP32);
const MCExpr *
getExprForPersonalitySymbol(const MCSymbol *Sym, unsigned Encoding,
MCStreamer &Streamer) const override;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index df12274d947..1d583ec0087 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -241,7 +241,7 @@ static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI,
const Triple &TheTriple) {
MCAsmInfo *MAI;
if (TheTriple.isOSBinFormatMachO())
- MAI = new AArch64MCAsmInfoDarwin();
+ MAI = new AArch64MCAsmInfoDarwin(TheTriple.getArch() == Triple::aarch64_32);
else if (TheTriple.isWindowsMSVCEnvironment())
MAI = new AArch64MCAsmInfoMicrosoftCOFF();
else if (TheTriple.isOSBinFormatCOFF())
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index a4b23dc8771..97abd084bf5 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3387,6 +3387,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
case CCValAssign::SExtUpper:
case CCValAssign::ZExtUpper:
case CCValAssign::FPExt:
+ case CCValAssign::Trunc:
llvm_unreachable("Unexpected loc info!");
case CCValAssign::Indirect:
// FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
OpenPOWER on IntegriCloud