summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorTim Northover <tnorthover@apple.com>2018-10-24 20:19:09 +0000
committerTim Northover <tnorthover@apple.com>2018-10-24 20:19:09 +0000
commit1c353419ab51f63235389b821c1e6ed02c3ccfb8 (patch)
tree20efe54b2cde1c403729f37ad13e137b7ff32c66 /llvm/lib
parent769d4cebadf2ea8442093f156f5497dbe82a3737 (diff)
downloadbcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.tar.gz
bcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.zip
AArch64: add a pass to compress jump-table entries when possible.
llvm-svn: 345188
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td13
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp132
-rw-r--r--llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp162
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp20
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h1
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp8
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td24
-rw-r--r--llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h15
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp8
-rw-r--r--llvm/lib/Target/AArch64/CMakeLists.txt1
12 files changed, 384 insertions, 4 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 6472dcd5157..2f0d0bf346d 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -32,6 +32,7 @@ class MachineFunctionPass;
FunctionPass *createAArch64DeadRegisterDefinitions();
FunctionPass *createAArch64RedundantCopyEliminationPass();
FunctionPass *createAArch64CondBrTuning();
+FunctionPass *createAArch64CompressJumpTablesPass();
FunctionPass *createAArch64ConditionalCompares();
FunctionPass *createAArch64AdvSIMDScalar();
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
@@ -62,6 +63,7 @@ void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
void initializeAArch64BranchTargetsPass(PassRegistry&);
void initializeAArch64CollectLOHPass(PassRegistry&);
void initializeAArch64CondBrTuningPass(PassRegistry &);
+void initializeAArch64CompressJumpTablesPass(PassRegistry&);
void initializeAArch64ConditionalComparesPass(PassRegistry&);
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 368898fd1e6..de78ca5b257 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -180,6 +180,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
+def FeatureForce32BitJumpTables
+ : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
+ "Force jump table entries to be 32-bits wide except at MinSize">;
+
def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
"Enable support for RCPC extension">;
@@ -411,7 +415,8 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
FeaturePostRAScheduler,
FeatureSlowMisaligned128Store,
FeatureUseRSqrt,
- FeatureZCZeroingFP]>;
+ FeatureZCZeroingFP,
+ FeatureForce32BitJumpTables]>;
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
"Samsung Exynos-M2 processors",
@@ -425,7 +430,8 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
FeaturePerfMon,
FeaturePostRAScheduler,
FeatureSlowMisaligned128Store,
- FeatureZCZeroingFP]>;
+ FeatureZCZeroingFP,
+ FeatureForce32BitJumpTables]>;
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
@@ -442,7 +448,8 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeaturePerfMon,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
- FeatureZCZeroingFP]>;
+ FeatureZCZeroingFP,
+ FeatureForce32BitJumpTables]>;
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
"Qualcomm Kryo processors", [
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 23b6a65555a..b1375c969d9 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -31,6 +31,8 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +79,12 @@ public:
return MCInstLowering.lowerOperand(MO, MCOp);
}
+ void EmitJumpTableInfo() override;
+ void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB, unsigned JTI);
+
+ void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
+
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -433,6 +441,104 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
printOperand(MI, NOps - 2, OS);
}
+void AArch64AsmPrinter::EmitJumpTableInfo() {
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ if (!MJTI) return;
+
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ if (JT.empty()) return;
+
+ const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+ MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
+ OutStreamer->SwitchSection(ReadOnlySec);
+
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+ // If this jump table was deleted, ignore it.
+ if (JTBBs.empty()) continue;
+
+ unsigned Size = AFI->getJumpTableEntrySize(JTI);
+ EmitAlignment(Log2_32(Size));
+ OutStreamer->EmitLabel(GetJTISymbol(JTI));
+
+ for (auto *JTBB : JTBBs)
+ emitJumpTableEntry(MJTI, JTBB, JTI);
+ }
+}
+
+void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+ const MachineBasicBlock *MBB,
+ unsigned JTI) {
+ const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ unsigned Size = AFI->getJumpTableEntrySize(JTI);
+
+ if (Size == 4) {
+ // .word LBB - LJTI
+ const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+ const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ } else {
+ // .byte (LBB - LBB) >> 2 (or .hword)
+ const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
+ const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
+ Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+ Value = MCBinaryExpr::createLShr(
+ Value, MCConstantExpr::create(2, OutContext), OutContext);
+ }
+
+ OutStreamer->EmitValue(Value, Size);
+}
+
+/// Small jump tables contain an unsigned byte or half, representing the offset
+/// from the lowest-addressed possible destination to the desired basic
+/// block. Since all instructions are 4-byte aligned, this is further compressed
+/// by counting in instructions rather than bytes (i.e. divided by 4). So, to
+/// materialize the correct destination we need:
+///
+/// adr xDest, .LBB0_0
+/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh).
+/// add xDest, xDest, xScratch, lsl #2
+void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
+ const llvm::MachineInstr &MI) {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned ScratchReg = MI.getOperand(1).getReg();
+ unsigned ScratchRegW =
+ STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32);
+ unsigned TableReg = MI.getOperand(2).getReg();
+ unsigned EntryReg = MI.getOperand(3).getReg();
+ int JTIdx = MI.getOperand(4).getIndex();
+ bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
+
+ // This has to be first because the compression pass based its reachability
+ // calculations on the start of the JumpTableDest instruction.
+ auto Label =
+ MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
+ .addReg(DestReg)
+ .addExpr(MCSymbolRefExpr::create(
+ Label, MF->getContext())));
+
+ // Load the number of instruction-steps to offset from the label.
+ unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
+ EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
+ .addReg(ScratchRegW)
+ .addReg(TableReg)
+ .addReg(EntryReg)
+ .addImm(0)
+ .addImm(IsByteEntry ? 0 : 1));
+
+ // Multiply the steps by 4 and add to the already materialized base label
+ // address.
+ EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+ .addReg(DestReg)
+ .addReg(DestReg)
+ .addReg(ScratchReg)
+ .addImm(2));
+}
+
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI) {
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -662,6 +768,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
return;
}
+ case AArch64::JumpTableDest32: {
+ // We want:
+ // ldrsw xScratch, [xTable, xEntry, lsl #2]
+ // add xDest, xTable, xScratch
+ unsigned DestReg = MI->getOperand(0).getReg(),
+ ScratchReg = MI->getOperand(1).getReg(),
+ TableReg = MI->getOperand(2).getReg(),
+ EntryReg = MI->getOperand(3).getReg();
+ EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
+ .addReg(ScratchReg)
+ .addReg(TableReg)
+ .addReg(EntryReg)
+ .addImm(0)
+ .addImm(1));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+ .addReg(DestReg)
+ .addReg(TableReg)
+ .addReg(ScratchReg)
+ .addImm(0));
+ return;
+ }
+ case AArch64::JumpTableDest16:
+ case AArch64::JumpTableDest8:
+ LowerJumpTableDestSmall(*OutStreamer, *MI);
+ return;
+
case AArch64::FMOVH0:
case AArch64::FMOVS0:
case AArch64::FMOVD0:
diff --git a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
new file mode 100644
index 00000000000..0924a27e258
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -0,0 +1,162 @@
+//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This pass looks at the basic blocks each jump-table refers to and works out
+// whether they can be emitted in a compressed form (with 8 or 16-bit
+// entries). If so, it changes the opcode and flags them in the associated
+// AArch64FunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-jump-tables"
+
+STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries");
+STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries");
+STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries");
+
+namespace {
+class AArch64CompressJumpTables : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineFunction *MF;
+ SmallVector<int, 8> BlockInfo;
+
+ int computeBlockSize(MachineBasicBlock &MBB);
+ void scanFunction();
+
+ bool compressJumpTable(MachineInstr &MI, int Offset);
+
+public:
+ static char ID;
+ AArch64CompressJumpTables() : MachineFunctionPass(ID) {
+ initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+ StringRef getPassName() const override {
+ return "AArch64 Compress Jump Tables";
+ }
+};
+char AArch64CompressJumpTables::ID = 0;
+}
+
+INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
+ "AArch64 compress jump tables pass", false, false)
+
+int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+ int Size = 0;
+ for (const MachineInstr &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+}
+
+void AArch64CompressJumpTables::scanFunction() {
+ BlockInfo.clear();
+ BlockInfo.resize(MF->getNumBlockIDs());
+
+ int Offset = 0;
+ for (MachineBasicBlock &MBB : *MF) {
+ BlockInfo[MBB.getNumber()] = Offset;
+ Offset += computeBlockSize(MBB);
+ }
+}
+
+bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
+ int Offset) {
+ if (MI.getOpcode() != AArch64::JumpTableDest32)
+ return false;
+
+ int JTIdx = MI.getOperand(4).getIndex();
+ auto &JTInfo = *MF->getJumpTableInfo();
+ const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx];
+
+ // The jump-table might have been optimized away.
+ if (JT.MBBs.empty())
+ return false;
+
+ int MaxOffset = std::numeric_limits<int>::min(),
+ MinOffset = std::numeric_limits<int>::max();
+ MachineBasicBlock *MinBlock = nullptr;
+ for (auto Block : JT.MBBs) {
+ int BlockOffset = BlockInfo[Block->getNumber()];
+ assert(BlockOffset % 4 == 0 && "misaligned basic block");
+
+ MaxOffset = std::max(MaxOffset, BlockOffset);
+ if (BlockOffset <= MinOffset) {
+ MinOffset = BlockOffset;
+ MinBlock = Block;
+ }
+ }
+
+ // The ADR instruction needed to calculate the address of the first reachable
+ // basic block can address +/-1MB.
+ if (!isInt<21>(MinOffset - Offset)) {
+ ++NumJT32;
+ return false;
+ }
+
+ int Span = MaxOffset - MinOffset;
+ auto AFI = MF->getInfo<AArch64FunctionInfo>();
+ if (isUInt<8>(Span / 4)) {
+ AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
+ MI.setDesc(TII->get(AArch64::JumpTableDest8));
+ ++NumJT8;
+ return true;
+ } else if (isUInt<16>(Span / 4)) {
+ AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
+ MI.setDesc(TII->get(AArch64::JumpTableDest16));
+ ++NumJT16;
+ return true;
+ }
+
+ ++NumJT32;
+ return false;
+}
+
+bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
+ bool Changed = false;
+ MF = &MFIn;
+
+ const auto &ST = MF->getSubtarget<AArch64Subtarget>();
+ TII = ST.getInstrInfo();
+
+ if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
+ return false;
+
+ scanFunction();
+
+ for (MachineBasicBlock &MBB : *MF) {
+ int Offset = BlockInfo[MBB.getNumber()];
+ for (MachineInstr &MI : MBB) {
+ Changed |= compressJumpTable(MI, Offset);
+ Offset += TII->getInstSizeInBytes(MI);
+ }
+ }
+
+ return Changed;
+}
+
+FunctionPass *llvm::createAArch64CompressJumpTablesPass() {
+ return new AArch64CompressJumpTables();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fea1531540f..c8227cd139a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -187,7 +187,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::JumpTable, MVT::i64, Custom);
setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
@@ -2825,6 +2825,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerSELECT_CC(Op, DAG);
case ISD::JumpTable:
return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT:
+ return LowerBR_JT(Op, DAG);
case ISD::ConstantPool:
return LowerConstantPool(Op, DAG);
case ISD::BlockAddress:
@@ -4902,6 +4904,22 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
return getAddr(JT, DAG);
}
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Jump table entries as PC relative offsets. No additional tweaking
+ // is necessary here. Just get the address of the jump table.
+ SDLoc DL(Op);
+ SDValue JT = Op.getOperand(1);
+ SDValue Entry = Op.getOperand(2);
+ int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
+
+ SDNode *Dest =
+ DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
+ Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
+ return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
+ SDValue(Dest, 0));
+}
+
SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
SelectionDAG &DAG) const {
ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 94df7e4c39d..3e89de665a7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -607,6 +607,7 @@ private:
SDValue TVal, SDValue FVal, const SDLoc &dl,
SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bbd734a542c..e6474046534 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -108,6 +108,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
// This gets lowered to an instruction sequence which takes 16 bytes
NumBytes = 16;
break;
+ case AArch64::JumpTableDest32:
+ case AArch64::JumpTableDest16:
+ case AArch64::JumpTableDest8:
+ NumBytes = 12;
+ break;
+ case AArch64::SPACE:
+ NumBytes = MI.getOperand(1).getImm();
+ break;
}
return NumBytes;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1d9e3d0b812..24f6aaaab57 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -448,6 +448,30 @@ def : Pat<(AArch64LOADgot texternalsym:$addr),
def : Pat<(AArch64LOADgot tconstpool:$addr),
(LOADgot tconstpool:$addr)>;
+// 32-bit jump table destination is actually only 2 instructions since we can
+// use the table itself as a PC-relative base. But optimization occurs after
+// branch relaxation so be pessimistic.
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
+def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+ (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+ Sched<[]>;
+}
+
+// Space-consuming pseudo to aid testing of placement and reachability
+// algorithms. Immediate operand is the number of bytes this "instruction"
+// occupies; register operands can be used to enforce dependency and constrain
+// the scheduler.
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
+ [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
+ Sched<[]>;
+
//===----------------------------------------------------------------------===//
// System instructions.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index e42214d1569..63c0ba2811e 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -162,6 +162,19 @@ public:
unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
+ unsigned getJumpTableEntrySize(int Idx) const {
+ auto It = JumpTableEntryInfo.find(Idx);
+ if (It != JumpTableEntryInfo.end())
+ return It->second.first;
+ return 4;
+ }
+ MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
+ return JumpTableEntryInfo.find(Idx)->second.second;
+ }
+ void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
+ JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
+ }
+
using SetOfInstructions = SmallPtrSet<const MachineInstr *, 16>;
const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
@@ -200,6 +213,8 @@ private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
SetOfInstructions LOHRelated;
+
+ DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index abe1980740e..8bf7c165408 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -142,6 +142,7 @@ protected:
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
+ bool Force32BitJumpTables = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -292,6 +293,7 @@ public:
}
bool useRSqrt() const { return UseRSqrt; }
+ bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index e183288d8df..fe2eea65ffe 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -123,6 +123,10 @@ static cl::opt<bool>
BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
cl::desc("Relax out of range conditional branches"));
+static cl::opt<bool> EnableCompressJumpTables(
+ "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Use smallest entry possible for jump tables"));
+
// FIXME: Unify control over GlobalMerge.
static cl::opt<cl::boolOrDefault>
EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
@@ -158,6 +162,7 @@ extern "C" void LLVMInitializeAArch64Target() {
initializeAArch64AdvSIMDScalarPass(*PR);
initializeAArch64BranchTargetsPass(*PR);
initializeAArch64CollectLOHPass(*PR);
+ initializeAArch64CompressJumpTablesPass(*PR);
initializeAArch64ConditionalComparesPass(*PR);
initializeAArch64ConditionOptimizerPass(*PR);
initializeAArch64DeadRegisterDefinitionsPass(*PR);
@@ -546,6 +551,9 @@ void AArch64PassConfig::addPreEmitPass() {
if (EnableBranchTargets)
addPass(createAArch64BranchTargetsPass());
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+ addPass(createAArch64CompressJumpTablesPass());
+
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index c57ebeb854c..58190686c79 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_target(AArch64CodeGen
AArch64FastISel.cpp
AArch64A53Fix835769.cpp
AArch64FrameLowering.cpp
+ AArch64CompressJumpTables.cpp
AArch64ConditionOptimizer.cpp
AArch64RedundantCopyElimination.cpp
AArch64ISelDAGToDAG.cpp
OpenPOWER on IntegriCloud