AArch64: add a pass to compress jump-table entries when possible.

llvm-svn: 345188
author: Tim Northover <tnorthover@apple.com> 2018-10-24 20:19:09 +0000
committer: Tim Northover <tnorthover@apple.com> 2018-10-24 20:19:09 +0000
commit: 1c353419ab51f63235389b821c1e6ed02c3ccfb8 (patch)
tree: 20efe54b2cde1c403729f37ad13e137b7ff32c66 /llvm/lib
parent: 769d4cebadf2ea8442093f156f5497dbe82a3737 (diff)
download: bcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.tar.gz
bcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.zip
12 files changed, 384 insertions, 4 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 6472dcd5157..2f0d0bf346d 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -32,6 +32,7 @@ class MachineFunctionPass;
 FunctionPass *createAArch64DeadRegisterDefinitions();
 FunctionPass *createAArch64RedundantCopyEliminationPass();
 FunctionPass *createAArch64CondBrTuning();
+FunctionPass *createAArch64CompressJumpTablesPass();
 FunctionPass *createAArch64ConditionalCompares();
 FunctionPass *createAArch64AdvSIMDScalar();
 FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
@@ -62,6 +63,7 @@ void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
 void initializeAArch64BranchTargetsPass(PassRegistry&);
 void initializeAArch64CollectLOHPass(PassRegistry&);
 void initializeAArch64CondBrTuningPass(PassRegistry &);
+void initializeAArch64CompressJumpTablesPass(PassRegistry&);
 void initializeAArch64ConditionalComparesPass(PassRegistry&);
 void initializeAArch64ConditionOptimizerPass(PassRegistry&);
 void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 368898fd1e6..de78ca5b257 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -180,6 +180,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
     "Disable latency scheduling heuristic">;
 
+def FeatureForce32BitJumpTables
+   : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
+                      "Force jump table entries to be 32-bits wide except at MinSize">;
+
 def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
                                    "Enable support for RCPC extension">;
 
@@ -411,7 +415,8 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
                                      FeaturePostRAScheduler,
                                      FeatureSlowMisaligned128Store,
                                      FeatureUseRSqrt,
-                                     FeatureZCZeroingFP]>;
+                                     FeatureZCZeroingFP,
+                                     FeatureForce32BitJumpTables]>;
 
 def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
                                     "Samsung Exynos-M2 processors",
@@ -425,7 +430,8 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
                                      FeaturePerfMon,
                                      FeaturePostRAScheduler,
                                      FeatureSlowMisaligned128Store,
-                                     FeatureZCZeroingFP]>;
+                                     FeatureZCZeroingFP,
+                                     FeatureForce32BitJumpTables]>;
 
 def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
                                     "Samsung Exynos-M3 processors",
@@ -442,7 +448,8 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
                                      FeaturePerfMon,
                                      FeaturePostRAScheduler,
                                      FeaturePredictableSelectIsExpensive,
-                                     FeatureZCZeroingFP]>;
+                                     FeatureZCZeroingFP,
+                                     FeatureForce32BitJumpTables]>;
 
 def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
                                    "Qualcomm Kryo processors", [
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 23b6a65555a..b1375c969d9 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -31,6 +31,8 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineModuleInfoImpls.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +79,12 @@ public:
     return MCInstLowering.lowerOperand(MO, MCOp);
   }
 
+  void EmitJumpTableInfo() override;
+  void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                          const MachineBasicBlock *MBB, unsigned JTI);
+
+  void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
+
   void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                      const MachineInstr &MI);
   void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -433,6 +441,104 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
   printOperand(MI, NOps - 2, OS);
 }
 
+void AArch64AsmPrinter::EmitJumpTableInfo() {
+  const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+  if (!MJTI) return;
+
+  const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+  if (JT.empty()) return;
+
+  const TargetLoweringObjectFile &TLOF = getObjFileLowering();
+  MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
+  OutStreamer->SwitchSection(ReadOnlySec);
+
+  auto AFI = MF->getInfo<AArch64FunctionInfo>();
+  for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
+    const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+
+    // If this jump table was deleted, ignore it.
+    if (JTBBs.empty()) continue;
+
+    unsigned Size = AFI->getJumpTableEntrySize(JTI);
+    EmitAlignment(Log2_32(Size));
+    OutStreamer->EmitLabel(GetJTISymbol(JTI));
+
+    for (auto *JTBB : JTBBs)
+      emitJumpTableEntry(MJTI, JTBB, JTI);
+  }
+}
+
+void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
+                                           const MachineBasicBlock *MBB,
+                                           unsigned JTI) {
+  const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
+  auto AFI = MF->getInfo<AArch64FunctionInfo>();
+  unsigned Size = AFI->getJumpTableEntrySize(JTI);
+
+  if (Size == 4) {
+    // .word LBB - LJTI
+    const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
+    const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
+    Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+  } else {
+    // .byte (LBB - LBB) >> 2 (or .hword)
+    const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
+    const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
+    Value = MCBinaryExpr::createSub(Value, Base, OutContext);
+    Value = MCBinaryExpr::createLShr(
+        Value, MCConstantExpr::create(2, OutContext), OutContext);
+  }
+
+  OutStreamer->EmitValue(Value, Size);
+}
+
+/// Small jump tables contain an unsigned byte or half, representing the offset
+/// from the lowest-addressed possible destination to the desired basic
+/// block. Since all instructions are 4-byte aligned, this is further compressed
+/// by counting in instructions rather than bytes (i.e. divided by 4). So, to
+/// materialize the correct destination we need:
+///
+///             adr xDest, .LBB0_0
+///             ldrb wScratch, [xTable, xEntry]   (with "lsl #1" for ldrh).
+///             add xDest, xDest, xScratch, lsl #2
+void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
+                                                const llvm::MachineInstr &MI) {
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned ScratchReg = MI.getOperand(1).getReg();
+  unsigned ScratchRegW =
+      STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32);
+  unsigned TableReg = MI.getOperand(2).getReg();
+  unsigned EntryReg = MI.getOperand(3).getReg();
+  int JTIdx = MI.getOperand(4).getIndex();
+  bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
+
+  // This has to be first because the compression pass based its reachability
+  // calculations on the start of the JumpTableDest instruction.
+  auto Label =
+      MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
+  EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
+                                  .addReg(DestReg)
+                                  .addExpr(MCSymbolRefExpr::create(
+                                      Label, MF->getContext())));
+
+  // Load the number of instruction-steps to offset from the label.
+  unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
+  EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
+                                  .addReg(ScratchRegW)
+                                  .addReg(TableReg)
+                                  .addReg(EntryReg)
+                                  .addImm(0)
+                                  .addImm(IsByteEntry ? 0 : 1));
+
+  // Multiply the steps by 4 and add to the already materialized base label
+  // address.
+  EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+                                  .addReg(DestReg)
+                                  .addReg(DestReg)
+                                  .addReg(ScratchReg)
+                                  .addImm(2));
+}
+
 void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                                       const MachineInstr &MI) {
   unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -662,6 +768,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
   }
 
+  case AArch64::JumpTableDest32: {
+    // We want:
+    //     ldrsw xScratch, [xTable, xEntry, lsl #2]
+    //     add xDest, xTable, xScratch
+    unsigned DestReg = MI->getOperand(0).getReg(),
+             ScratchReg = MI->getOperand(1).getReg(),
+             TableReg = MI->getOperand(2).getReg(),
+             EntryReg = MI->getOperand(3).getReg();
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
+                                     .addReg(ScratchReg)
+                                     .addReg(TableReg)
+                                     .addReg(EntryReg)
+                                     .addImm(0)
+                                     .addImm(1));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
+                                     .addReg(DestReg)
+                                     .addReg(TableReg)
+                                     .addReg(ScratchReg)
+                                     .addImm(0));
+    return;
+  }
+  case AArch64::JumpTableDest16:
+  case AArch64::JumpTableDest8:
+    LowerJumpTableDestSmall(*OutStreamer, *MI);
+    return;
+
   case AArch64::FMOVH0:
   case AArch64::FMOVS0:
   case AArch64::FMOVD0:
diff --git a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
new file mode 100644
index 00000000000..0924a27e258
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -0,0 +1,162 @@
+//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// This pass looks at the basic blocks each jump-table refers to and works out
+// whether they can be emitted in a compressed form (with 8 or 16-bit
+// entries). If so, it changes the opcode and flags them in the associated
+// AArch64FunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-jump-tables"
+
+STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries");
+STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries");
+STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries");
+
+namespace {
+class AArch64CompressJumpTables : public MachineFunctionPass {
+  const TargetInstrInfo *TII;
+  MachineFunction *MF;
+  SmallVector<int, 8> BlockInfo;
+
+  int computeBlockSize(MachineBasicBlock &MBB);
+  void scanFunction();
+
+  bool compressJumpTable(MachineInstr &MI, int Offset);
+
+public:
+  static char ID;
+  AArch64CompressJumpTables() : MachineFunctionPass(ID) {
+    initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+  StringRef getPassName() const override {
+    return "AArch64 Compress Jump Tables";
+  }
+};
+char AArch64CompressJumpTables::ID = 0;
+}
+
+INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
+                "AArch64 compress jump tables pass", false, false)
+
+int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
+  int Size = 0;
+  for (const MachineInstr &MI : MBB)
+    Size += TII->getInstSizeInBytes(MI);
+  return Size;
+}
+
+void AArch64CompressJumpTables::scanFunction() {
+  BlockInfo.clear();
+  BlockInfo.resize(MF->getNumBlockIDs());
+
+  int Offset = 0;
+  for (MachineBasicBlock &MBB : *MF) {
+    BlockInfo[MBB.getNumber()] = Offset;
+    Offset += computeBlockSize(MBB);
+  }
+}
+
+bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
+                                                  int Offset) {
+  if (MI.getOpcode() != AArch64::JumpTableDest32)
+    return false;
+
+  int JTIdx = MI.getOperand(4).getIndex();
+  auto &JTInfo = *MF->getJumpTableInfo();
+  const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx];
+
+  // The jump-table might have been optimized away.
+  if (JT.MBBs.empty())
+    return false;
+
+  int MaxOffset = std::numeric_limits<int>::min(),
+      MinOffset = std::numeric_limits<int>::max();
+  MachineBasicBlock *MinBlock = nullptr;
+  for (auto Block : JT.MBBs) {
+    int BlockOffset = BlockInfo[Block->getNumber()];
+    assert(BlockOffset % 4 == 0 && "misaligned basic block");
+
+    MaxOffset = std::max(MaxOffset, BlockOffset);
+    if (BlockOffset <= MinOffset) {
+      MinOffset = BlockOffset;
+      MinBlock = Block;
+    }
+  }
+
+  // The ADR instruction needed to calculate the address of the first reachable
+  // basic block can address +/-1MB.
+  if (!isInt<21>(MinOffset - Offset)) {
+    ++NumJT32;
+    return false;
+  }
+
+  int Span = MaxOffset - MinOffset;
+  auto AFI = MF->getInfo<AArch64FunctionInfo>();
+  if (isUInt<8>(Span / 4)) {
+    AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
+    MI.setDesc(TII->get(AArch64::JumpTableDest8));
+    ++NumJT8;
+    return true;
+  } else if (isUInt<16>(Span / 4)) {
+    AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
+    MI.setDesc(TII->get(AArch64::JumpTableDest16));
+    ++NumJT16;
+    return true;
+  }
+
+  ++NumJT32;
+  return false;
+}
+
+bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
+  bool Changed = false;
+  MF = &MFIn;
+
+  const auto &ST = MF->getSubtarget<AArch64Subtarget>();
+  TII = ST.getInstrInfo();
+
+  if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
+    return false;
+
+  scanFunction();
+
+  for (MachineBasicBlock &MBB : *MF) {
+    int Offset = BlockInfo[MBB.getNumber()];
+    for (MachineInstr &MI : MBB) {
+      Changed |= compressJumpTable(MI, Offset);
+      Offset += TII->getInstSizeInBytes(MI);
+    }
+  }
+
+  return Changed;
+}
+
+FunctionPass *llvm::createAArch64CompressJumpTablesPass() {
+  return new AArch64CompressJumpTables();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fea1531540f..c8227cd139a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -187,7 +187,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
-  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+  setOperationAction(ISD::BR_JT, MVT::Other, Custom);
   setOperationAction(ISD::JumpTable, MVT::i64, Custom);
 
   setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
@@ -2825,6 +2825,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerSELECT_CC(Op, DAG);
   case ISD::JumpTable:
     return LowerJumpTable(Op, DAG);
+  case ISD::BR_JT:
+    return LowerBR_JT(Op, DAG);
   case ISD::ConstantPool:
     return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:
@@ -4902,6 +4904,22 @@ SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
   return getAddr(JT, DAG);
 }
 
+SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
+                                          SelectionDAG &DAG) const {
+  // Jump table entries as PC relative offsets. No additional tweaking
+  // is necessary here. Just get the address of the jump table.
+  SDLoc DL(Op);
+  SDValue JT = Op.getOperand(1);
+  SDValue Entry = Op.getOperand(2);
+  int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
+
+  SDNode *Dest =
+      DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
+                         Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
+  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
+                     SDValue(Dest, 0));
+}
+
 SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
                                                  SelectionDAG &DAG) const {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 94df7e4c39d..3e89de665a7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -607,6 +607,7 @@ private:
                          SDValue TVal, SDValue FVal, const SDLoc &dl,
                          SelectionDAG &DAG) const;
   SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index bbd734a542c..e6474046534 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -108,6 +108,14 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     // This gets lowered to an instruction sequence which takes 16 bytes
     NumBytes = 16;
     break;
+  case AArch64::JumpTableDest32:
+  case AArch64::JumpTableDest16:
+  case AArch64::JumpTableDest8:
+    NumBytes = 12;
+    break;
+  case AArch64::SPACE:
+    NumBytes = MI.getOperand(1).getImm();
+    break;
   }
 
   return NumBytes;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 1d9e3d0b812..24f6aaaab57 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -448,6 +448,30 @@ def : Pat<(AArch64LOADgot texternalsym:$addr),
 def : Pat<(AArch64LOADgot tconstpool:$addr),
           (LOADgot tconstpool:$addr)>;
 
+// 32-bit jump table destination is actually only 2 instructions since we can
+// use the table itself as a PC-relative base. But optimization occurs after
+// branch relaxation so be pessimistic.
+let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch" in {
+def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+                      Sched<[]>;
+def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+                      Sched<[]>;
+def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
+                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
+                     Sched<[]>;
+}
+
+// Space-consuming pseudo to aid testing of placement and reachability
+// algorithms. Immediate operand is the number of bytes this "instruction"
+// occupies; register operands can be used to enforce dependency and constrain
+// the scheduler.
+let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
+def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
+                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
+            Sched<[]>;
+
 //===----------------------------------------------------------------------===//
 // System instructions.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index e42214d1569..63c0ba2811e 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -162,6 +162,19 @@ public:
   unsigned getVarArgsFPRSize() const { return VarArgsFPRSize; }
   void setVarArgsFPRSize(unsigned Size) { VarArgsFPRSize = Size; }
 
+  unsigned getJumpTableEntrySize(int Idx) const {
+    auto It = JumpTableEntryInfo.find(Idx);
+    if (It != JumpTableEntryInfo.end())
+      return It->second.first;
+    return 4;
+  }
+  MCSymbol *getJumpTableEntryPCRelSymbol(int Idx) const {
+    return JumpTableEntryInfo.find(Idx)->second.second;
+  }
+  void setJumpTableEntryInfo(int Idx, unsigned Size, MCSymbol *PCRelSym) {
+    JumpTableEntryInfo[Idx] = std::make_pair(Size, PCRelSym);
+  }
+
   using SetOfInstructions = SmallPtrSet<const MachineInstr *, 16>;
 
   const SetOfInstructions &getLOHRelated() const { return LOHRelated; }
@@ -200,6 +213,8 @@ private:
   // Hold the lists of LOHs.
   MILOHContainer LOHContainerSet;
   SetOfInstructions LOHRelated;
+
+  DenseMap<int, std::pair<unsigned, MCSymbol *>> JumpTableEntryInfo;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index abe1980740e..8bf7c165408 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -142,6 +142,7 @@ protected:
   bool HasFuseLiterals = false;
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
+  bool Force32BitJumpTables = false;
   uint8_t MaxInterleaveFactor = 2;
   uint8_t VectorInsertExtractBaseCost = 3;
   uint16_t CacheLineSize = 0;
@@ -292,6 +293,7 @@ public:
   }
 
   bool useRSqrt() const { return UseRSqrt; }
+  bool force32BitJumpTables() const { return Force32BitJumpTables; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   unsigned getVectorInsertExtractBaseCost() const {
     return VectorInsertExtractBaseCost;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index e183288d8df..fe2eea65ffe 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -123,6 +123,10 @@ static cl::opt<bool>
     BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
                      cl::desc("Relax out of range conditional branches"));
 
+static cl::opt<bool> EnableCompressJumpTables(
+    "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
+    cl::desc("Use smallest entry possible for jump tables"));
+
 // FIXME: Unify control over GlobalMerge.
 static cl::opt<cl::boolOrDefault>
     EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
@@ -158,6 +162,7 @@ extern "C" void LLVMInitializeAArch64Target() {
   initializeAArch64AdvSIMDScalarPass(*PR);
   initializeAArch64BranchTargetsPass(*PR);
   initializeAArch64CollectLOHPass(*PR);
+  initializeAArch64CompressJumpTablesPass(*PR);
   initializeAArch64ConditionalComparesPass(*PR);
   initializeAArch64ConditionOptimizerPass(*PR);
   initializeAArch64DeadRegisterDefinitionsPass(*PR);
@@ -546,6 +551,9 @@ void AArch64PassConfig::addPreEmitPass() {
   if (EnableBranchTargets)
     addPass(createAArch64BranchTargetsPass());
 
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+    addPass(createAArch64CompressJumpTablesPass());
+
   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
       TM->getTargetTriple().isOSBinFormatMachO())
     addPass(createAArch64CollectLOHPass());
diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt
index c57ebeb854c..58190686c79 100644
--- a/llvm/lib/Target/AArch64/CMakeLists.txt
+++ b/llvm/lib/Target/AArch64/CMakeLists.txt
@@ -34,6 +34,7 @@ add_llvm_target(AArch64CodeGen
   AArch64FastISel.cpp
   AArch64A53Fix835769.cpp
   AArch64FrameLowering.cpp
+  AArch64CompressJumpTables.cpp
   AArch64ConditionOptimizer.cpp
   AArch64RedundantCopyElimination.cpp
   AArch64ISelDAGToDAG.cpp
author	Tim Northover <tnorthover@apple.com>	2018-10-24 20:19:09 +0000
committer	Tim Northover <tnorthover@apple.com>	2018-10-24 20:19:09 +0000
commit	1c353419ab51f63235389b821c1e6ed02c3ccfb8 (patch)
tree	20efe54b2cde1c403729f37ad13e137b7ff32c66 /llvm/lib
parent	769d4cebadf2ea8442093f156f5497dbe82a3737 (diff)
download	bcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.tar.gz bcm5719-llvm-1c353419ab51f63235389b821c1e6ed02c3ccfb8.zip