diff options
author | Tim Northover <tnorthover@apple.com> | 2018-10-24 20:19:09 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2018-10-24 20:19:09 +0000 |
commit | 1c353419ab51f63235389b821c1e6ed02c3ccfb8 (patch) | |
tree | 20efe54b2cde1c403729f37ad13e137b7ff32c66 /llvm/lib | |
parent | 769d4cebadf2ea8442093f156f5497dbe82a3737 (diff) | |
download | bcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.tar.gz bcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.zip |
AArch64: add a pass to compress jump-table entries when possible.
llvm-svn: 345188
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64.td | 13 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 132 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp | 162 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 20 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.h | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 24 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h | 15 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64Subtarget.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64TargetMachine.cpp | 8 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/CMakeLists.txt | 1 |
12 files changed, 384 insertions, 4 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index 6472dcd5157..2f0d0bf346d 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -32,6 +32,7 @@ class MachineFunctionPass; FunctionPass *createAArch64DeadRegisterDefinitions(); FunctionPass *createAArch64RedundantCopyEliminationPass(); FunctionPass *createAArch64CondBrTuning(); +FunctionPass *createAArch64CompressJumpTablesPass(); FunctionPass *createAArch64ConditionalCompares(); FunctionPass *createAArch64AdvSIMDScalar(); FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM, @@ -62,6 +63,7 @@ void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64BranchTargetsPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); void initializeAArch64CondBrTuningPass(PassRegistry &); +void initializeAArch64CompressJumpTablesPass(PassRegistry&); void initializeAArch64ConditionalComparesPass(PassRegistry&); void initializeAArch64ConditionOptimizerPass(PassRegistry&); void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 368898fd1e6..de78ca5b257 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -180,6 +180,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; +def FeatureForce32BitJumpTables + : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", + "Force jump table entries to be 32-bits wide except at MinSize">; + def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true", "Enable support for RCPC extension">; @@ -411,7 +415,8 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1", FeaturePostRAScheduler, FeatureSlowMisaligned128Store, FeatureUseRSqrt, - FeatureZCZeroingFP]>; + FeatureZCZeroingFP, + FeatureForce32BitJumpTables]>; def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", "Samsung Exynos-M2 processors", @@ -425,7 +430,8 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1", FeaturePerfMon, FeaturePostRAScheduler, FeatureSlowMisaligned128Store, - FeatureZCZeroingFP]>; + FeatureZCZeroingFP, + FeatureForce32BitJumpTables]>; def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", "Samsung Exynos-M3 processors", @@ -442,7 +448,8 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeaturePerfMon, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, - FeatureZCZeroingFP]>; + FeatureZCZeroingFP, + FeatureForce32BitJumpTables]>; def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", "Qualcomm Kryo processors", [ diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 23b6a65555a..b1375c969d9 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -31,6 +31,8 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -77,6 +79,12 @@ public: return MCInstLowering.lowerOperand(MO, MCOp); } + void EmitJumpTableInfo() override; + void emitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, unsigned JTI); + + void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI); + void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI); void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM, @@ -433,6 +441,104 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, printOperand(MI, NOps - 2, OS); } +void AArch64AsmPrinter::EmitJumpTableInfo() { + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + if (!MJTI) return; + + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM); + OutStreamer->SwitchSection(ReadOnlySec); + + auto AFI = MF->getInfo<AArch64FunctionInfo>(); + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + unsigned Size = AFI->getJumpTableEntrySize(JTI); + EmitAlignment(Log2_32(Size)); + OutStreamer->EmitLabel(GetJTISymbol(JTI)); + + for (auto *JTBB : JTBBs) + emitJumpTableEntry(MJTI, JTBB, JTI); + } +} + +void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned JTI) { + const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + auto AFI = MF->getInfo<AArch64FunctionInfo>(); + unsigned Size = AFI->getJumpTableEntrySize(JTI); + + if (Size == 4) { + // .word LBB - LJTI + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext); + Value = MCBinaryExpr::createSub(Value, Base, OutContext); + } else { + // .byte (LBB - LBB) >> 2 (or .hword) + const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI); + const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext); + Value = MCBinaryExpr::createSub(Value, Base, OutContext); + Value = MCBinaryExpr::createLShr( + Value, MCConstantExpr::create(2, OutContext), OutContext); + } + + OutStreamer->EmitValue(Value, Size); +} + +/// Small jump tables contain an unsigned byte or half, representing the offset +/// from the lowest-addressed possible destination to the desired basic +/// block. Since all instructions are 4-byte aligned, this is further compressed +/// by counting in instructions rather than bytes (i.e. divided by 4). So, to +/// materialize the correct destination we need: +/// +/// adr xDest, .LBB0_0 +/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh). +/// add xDest, xDest, xScratch, lsl #2 +void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer, + const llvm::MachineInstr &MI) { + unsigned DestReg = MI.getOperand(0).getReg(); + unsigned ScratchReg = MI.getOperand(1).getReg(); + unsigned ScratchRegW = + STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32); + unsigned TableReg = MI.getOperand(2).getReg(); + unsigned EntryReg = MI.getOperand(3).getReg(); + int JTIdx = MI.getOperand(4).getIndex(); + bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8; + + // This has to be first because the compression pass based its reachability + // calculations on the start of the JumpTableDest instruction. + auto Label = + MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx); + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR) + .addReg(DestReg) + .addExpr(MCSymbolRefExpr::create( + Label, MF->getContext()))); + + // Load the number of instruction-steps to offset from the label. + unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX; + EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode) + .addReg(ScratchRegW) + .addReg(TableReg) + .addReg(EntryReg) + .addImm(0) + .addImm(IsByteEntry ? 0 : 1)); + + // Multiply the steps by 4 and add to the already materialized base label + // address. + EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs) + .addReg(DestReg) + .addReg(DestReg) + .addReg(ScratchReg) + .addImm(2)); +} + void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM, const MachineInstr &MI) { unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes(); @@ -662,6 +768,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + case AArch64::JumpTableDest32: { + // We want: + // ldrsw xScratch, [xTable, xEntry, lsl #2] + // add xDest, xTable, xScratch + unsigned DestReg = MI->getOperand(0).getReg(), + ScratchReg = MI->getOperand(1).getReg(), + TableReg = MI->getOperand(2).getReg(), + EntryReg = MI->getOperand(3).getReg(); + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX) + .addReg(ScratchReg) + .addReg(TableReg) + .addReg(EntryReg) + .addImm(0) + .addImm(1)); + EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs) + .addReg(DestReg) + .addReg(TableReg) + .addReg(ScratchReg) + .addImm(0)); + return; + } + case AArch64::JumpTableDest16: + case AArch64::JumpTableDest8: + LowerJumpTableDestSmall(*OutStreamer, *MI); + return; + case AArch64::FMOVH0: case AArch64::FMOVS0: case AArch64::FMOVD0: diff --git a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp new file mode 100644 index 00000000000..0924a27e258 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp @@ -0,0 +1,162 @@ +//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +// This pass looks at the basic blocks each jump-table refers to and works out +// whether they can be emitted in a compressed form (with 8 or 16-bit +// entries). If so, it changes the opcode and flags them in the associated +// AArch64FunctionInfo. +// +//===----------------------------------------------------------------------===// + +#include "AArch64.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-jump-tables" + +STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries"); +STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries"); +STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries"); + +namespace { +class AArch64CompressJumpTables : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineFunction *MF; + SmallVector<int, 8> BlockInfo; + + int computeBlockSize(MachineBasicBlock &MBB); + void scanFunction(); + + bool compressJumpTable(MachineInstr &MI, int Offset); + +public: + static char ID; + AArch64CompressJumpTables() : MachineFunctionPass(ID) { + initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + StringRef getPassName() const override { + return "AArch64 Compress Jump Tables"; + } +}; +char AArch64CompressJumpTables::ID = 0; +} + +INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE, + "AArch64 compress jump tables pass", false, false) + +int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) { + int Size = 0; + for (const MachineInstr &MI : MBB) + Size += TII->getInstSizeInBytes(MI); + return Size; +} + +void AArch64CompressJumpTables::scanFunction() { + BlockInfo.clear(); + BlockInfo.resize(MF->getNumBlockIDs()); + + int Offset = 0; + for (MachineBasicBlock &MBB : *MF) { + BlockInfo[MBB.getNumber()] = Offset; + Offset += computeBlockSize(MBB); + } +} + +bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI, + int Offset) { + if (MI.getOpcode() != AArch64::JumpTableDest32) + return false; + + int JTIdx = MI.getOperand(4).getIndex(); + auto &JTInfo = *MF->getJumpTableInfo(); + const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx]; + + // The jump-table might have been optimized away. + if (JT.MBBs.empty()) + return false; + + int MaxOffset = std::numeric_limits<int>::min(), + MinOffset = std::numeric_limits<int>::max(); + MachineBasicBlock *MinBlock = nullptr; + for (auto Block : JT.MBBs) { + int BlockOffset = BlockInfo[Block->getNumber()]; + assert(BlockOffset % 4 == 0 && "misaligned basic block"); + + MaxOffset = std::max(MaxOffset, BlockOffset); + if (BlockOffset <= MinOffset) { + MinOffset = BlockOffset; + MinBlock = Block; + } + } + + // The ADR instruction needed to calculate the address of the first reachable + // basic block can address +/-1MB. + if (!isInt<21>(MinOffset - Offset)) { + ++NumJT32; + return false; + } + + int Span = MaxOffset - MinOffset; + auto AFI = MF->getInfo<AArch64FunctionInfo>(); + if (isUInt<8>(Span / 4)) { + AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol()); + MI.setDesc(TII->get(AArch64::JumpTableDest8)); + ++NumJT8; + return true; + } else if (isUInt<16>(Span / 4)) { + AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol()); + MI.setDesc(TII->get(AArch64::JumpTableDest16)); + ++NumJT16; + return true; + } + + ++NumJT32; + return false; +} + +bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) { + bool Changed = false; + MF = &MFIn; + + const auto &ST = MF->getSubtarget<AArch64Subtarget>(); + TII = ST.getInstrInfo(); + + if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize()) + return false; + + scanFunction(); + + for (MachineBasicBlock &MBB : *MF) { + int Offset = BlockInfo[MBB.getNumber()]; + for (MachineInstr &MI : MBB) { + Changed |= compressJumpTable(MI, Offset); + Offset += TII->getInstSizeInBytes(MI); + } + } + + return Changed; +} + +FunctionPass *llvm::createAArch64CompressJumpTablesPass() { + return new AArch64CompressJumpTables(); +} diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fea1531540f..c8227cd139a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -187,7 +187,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, MVT::f16, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BR_JT, MVT::Other, Custom); setOperationAction(ISD::JumpTable, MVT::i64, Custom); setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); @@ -2825,6 +2825,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, return LowerSELECT_CC(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); + case ISD::BR_JT: + return LowerBR_JT(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::BlockAddress: @@ -4902,6 +4904,22 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, return getAddr(JT, DAG); } +SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op, + SelectionDAG &DAG) const { + // Jump table entries as PC relative offsets. No additional tweaking + // is necessary here. Just get the address of the jump table. + SDLoc DL(Op); + SDValue JT = Op.getOperand(1); + SDValue Entry = Op.getOperand(2); + int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex(); + + SDNode *Dest = + DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT, + Entry, DAG.getTargetJumpTable(JTI, MVT::i32)); + return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0), + SDValue(Dest, 0)); +} + SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 94df7e4c39d..3e89de665a7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -607,6 +607,7 @@ private: SDValue TVal, SDValue FVal, const SDLoc &dl, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index bbd734a542c..e6474046534 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -108,6 +108,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // This gets lowered to an instruction sequence which takes 16 bytes NumBytes = 16; break; + case AArch64::JumpTableDest32: + case AArch64::JumpTableDest16: + case AArch64::JumpTableDest8: + NumBytes = 12; + break; + case AArch64::SPACE: + NumBytes = MI.getOperand(1).getImm(); + break; } return NumBytes; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 1d9e3d0b812..24f6aaaab57 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -448,6 +448,30 @@ def : Pat<(AArch64LOADgot texternalsym:$addr), def : Pat<(AArch64LOADgot tconstpool:$addr), (LOADgot tconstpool:$addr)>; +// 32-bit jump table destination is actually only 2 instructions since we can +// use the table itself as a PC-relative base. But optimization occurs after +// branch relaxation so be pessimistic. +let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in { +def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), + (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, + Sched<[]>; +def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), + (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, + Sched<[]>; +def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch), + (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>, + Sched<[]>; +} + +// Space-consuming pseudo to aid testing of placement and reachability +// algorithms. Immediate operand is the number of bytes this "instruction" +// occupies; register operands can be used to enforce dependency and constrain +// the scheduler. +let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in +def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn), + [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>, + Sched<[]>; + //===----------------------------------------------------------------------===// // System instructions. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index e42214d1569..63c0ba2811e 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -162,6 +162,19 @@ public: unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; } void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; } + unsigned getJumpTableEntrySize(int Idx) const { + auto It = JumpTableEntryInfo.find(Idx); + if (It != JumpTableEntryInfo.end()) + return It->second.first; + return 4; + } + MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const { + return JumpTableEntryInfo.find(Idx)->second.second; + } + void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) { + JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym); + } + using SetOfInstructions = SmallPtrSet<const MachineInstr *, 16>; const SetOfInstructions &getLOHRelated() const { return LOHRelated; } @@ -200,6 +213,8 @@ private: // Hold the lists of LOHs. MILOHContainer LOHContainerSet; SetOfInstructions LOHRelated; + + DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo; }; } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index abe1980740e..8bf7c165408 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -142,6 +142,7 @@ protected: bool HasFuseLiterals = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; + bool Force32BitJumpTables = false; uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; uint16_t CacheLineSize = 0; @@ -292,6 +293,7 @@ public: } bool useRSqrt() const { return UseRSqrt; } + bool force32BitJumpTables() const { return Force32BitJumpTables; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost; diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index e183288d8df..fe2eea65ffe 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -123,6 +123,10 @@ static cl::opt<bool> BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), cl::desc("Relax out of range conditional branches")); +static cl::opt<bool> EnableCompressJumpTables( + "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true), + cl::desc("Use smallest entry possible for jump tables")); + // FIXME: Unify control over GlobalMerge. static cl::opt<cl::boolOrDefault> EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden, @@ -158,6 +162,7 @@ extern "C" void LLVMInitializeAArch64Target() { initializeAArch64AdvSIMDScalarPass(*PR); initializeAArch64BranchTargetsPass(*PR); initializeAArch64CollectLOHPass(*PR); + initializeAArch64CompressJumpTablesPass(*PR); initializeAArch64ConditionalComparesPass(*PR); initializeAArch64ConditionOptimizerPass(*PR); initializeAArch64DeadRegisterDefinitionsPass(*PR); @@ -546,6 +551,9 @@ void AArch64PassConfig::addPreEmitPass() { if (EnableBranchTargets) addPass(createAArch64BranchTargetsPass()); + if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables) + addPass(createAArch64CompressJumpTablesPass()); + if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && TM->getTargetTriple().isOSBinFormatMachO()) addPass(createAArch64CollectLOHPass()); diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt index c57ebeb854c..58190686c79 100644 --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -34,6 +34,7 @@ add_llvm_target(AArch64CodeGen AArch64FastISel.cpp AArch64A53Fix835769.cpp AArch64FrameLowering.cpp + AArch64CompressJumpTables.cpp AArch64ConditionOptimizer.cpp AArch64RedundantCopyElimination.cpp AArch64ISelDAGToDAG.cpp |